Merge pull request #2776 from tunmx/bugfix/det_preprocess_error

Fix the issue of rga image processing for face detection
2025-12-30 08:02:27 +00:00 · 2025-06-16 13:23:39 +08:00
parent 8f94f04eb6 add23b8e2b
commit 554a05561c
20 changed files with 387 additions and 327 deletions
--- a/cpp-package/inspireface/README.md
+++ b/cpp-package/inspireface/README.md
@@ -768,6 +768,14 @@ For different scenarios, we currently provide several Packs, each containing mul
 - [x] Add the RKNPU backend support for Android .
 - [ ] Example app project for Android and iOS samples.
 - [ ] Add the batch forward feature.
+- [ ] Design a scheme that can be adapted to multiple CUDA devices.
+
+- Continue to provide more support for Rockchip NPU2 devices:
+- [ ] RK3576 Series
+- [ ] RK3562 Series
+- [ ] RV1103B/RV1106B
+- [ ] RV1126B
+- [ ] RK2118

 ## Acknowledgement

--- a/cpp-package/inspireface/cpp/inspireface/c_api/inspireface.cc
+++ b/cpp-package/inspireface/cpp/inspireface/c_api/inspireface.cc
@@ -694,6 +694,7 @@ HResult HFExecuteFaceTrack(HFSession session, HFImageStream streamHandle, PHFMul
    results->rects = (HFaceRect *)ctx->impl.GetFaceRectsCache().data();
    results->trackIds = (HInt32 *)ctx->impl.GetTrackIDCache().data();
    results->detConfidence = (HFloat *)ctx->impl.GetDetConfidenceCache().data();
+    results->trackCounts = (HInt32 *)ctx->impl.GetTrackCountCache().data();
    results->angles.pitch = (HFloat *)ctx->impl.GetPitchResultsCache().data();
    results->angles.roll = (HFloat *)ctx->impl.GetRollResultsCache().data();
    results->angles.yaw = (HFloat *)ctx->impl.GetYawResultsCache().data();
--- a/cpp-package/inspireface/cpp/inspireface/c_api/inspireface.h
+++ b/cpp-package/inspireface/cpp/inspireface/c_api/inspireface.h
@@ -513,6 +513,7 @@ typedef struct HFMultipleFaceData {
    HInt32 detectedNum;        ///< Number of faces detected.
    HFaceRect *rects;          ///< Array of bounding rectangles for each face.
    HInt32 *trackIds;          ///< Array of track IDs for each face.
+    HInt32 *trackCounts;       ///< Array of track counts for each face.
    HFloat *detConfidence;     ///< Array of detection confidence for each face.
    HFFaceEulerAngle angles;   ///< Euler angles for each face.
    PHFFaceBasicToken tokens;  ///< Tokens associated with each face.
--- a/cpp-package/inspireface/cpp/inspireface/common/face_data/face_serialize_tools.h
+++ b/cpp-package/inspireface/cpp/inspireface/common/face_data/face_serialize_tools.h
@@ -6,7 +6,7 @@
 #ifndef INSPIRE_FACE_SERIALIZE_TOOLS_H
 #define INSPIRE_FACE_SERIALIZE_TOOLS_H

-#include "face_warpper.h"
+#include "face_wrapper.h"
 #include "../face_info/face_object_internal.h"
 #include "herror.h"
 #include "data_type.h"
--- a/cpp-package/inspireface/cpp/inspireface/engine/face_session.cpp
+++ b/cpp-package/inspireface/cpp/inspireface/engine/face_session.cpp
@@ -52,6 +52,7 @@ int32_t FaceSession::FaceDetectAndTrack(inspirecv::FrameProcess& process) {
    m_face_basic_data_cache_.clear();
    m_face_rects_cache_.clear();
    m_track_id_cache_.clear();
+    m_track_count_cache_.clear();
    m_quality_results_cache_.clear();
    m_roll_results_cache_.clear();
    m_yaw_results_cache_.clear();
@@ -86,6 +87,7 @@ int32_t FaceSession::FaceDetectAndTrack(inspirecv::FrameProcess& process) {
        m_det_confidence_cache_.push_back(face.GetConfidence());
        m_detect_cache_.push_back(byteArray);
        m_track_id_cache_.push_back(face.GetTrackingId());
+        m_track_count_cache_.push_back(face.GetTrackingCount());
        m_face_rects_cache_.push_back(data.rect);
        m_quality_results_cache_.push_back(face.high_result);
        m_roll_results_cache_.push_back(face.high_result.roll);
@@ -273,6 +275,10 @@ const std::vector<int32_t>& FaceSession::GetTrackIDCache() const {
    return m_track_id_cache_;
 }

+const std::vector<int32_t>& FaceSession::GetTrackCountCache() const {
+    return m_track_count_cache_;
+}
+
 const std::vector<float>& FaceSession::GetRollResultsCache() const {
    return m_roll_results_cache_;
 }
--- a/cpp-package/inspireface/cpp/inspireface/engine/face_session.h
+++ b/cpp-package/inspireface/cpp/inspireface/engine/face_session.h
@@ -214,6 +214,12 @@ public:
     */
    const std::vector<int32_t>& GetTrackIDCache() const;

+    /**
+     * @brief Retrieves the cache of tracking count.
+     * @return std::vector<int32_t> Cache of tracking count.
+     */
+    const std::vector<int32_t>& GetTrackCountCache() const; 
+
    /**
     * @brief Retrieves the cache of roll results from face pose estimation.
     * @return std::vector<float> Cache of roll results.
@@ -396,6 +402,7 @@ private:
    std::vector<FaceBasicData> m_face_basic_data_cache_;               ///< Cache for basic face data extracted from detection
    std::vector<FaceRect> m_face_rects_cache_;                         ///< Cache for face rectangle data from detection
    std::vector<int32_t> m_track_id_cache_;                            ///< Cache for tracking IDs of detected faces
+    std::vector<int32_t> m_track_count_cache_;                         ///< Cache for tracking count of detected faces
    std::vector<float> m_det_confidence_cache_;                        ///< Cache for face detection confidence of detected faces
    std::vector<float> m_roll_results_cache_;                          ///< Cache for storing roll results from face pose estimation
    std::vector<float> m_yaw_results_cache_;                           ///< Cache for storing yaw results from face pose estimation
--- a/cpp-package/inspireface/cpp/inspireface/include/inspireface/face_wrapper.h
+++ b/cpp-package/inspireface/cpp/inspireface/include/inspireface/face_wrapper.h
--- a/cpp-package/inspireface/cpp/inspireface/include/inspireface/inspireface.hpp
+++ b/cpp-package/inspireface/cpp/inspireface/include/inspireface/inspireface.hpp
@@ -11,4 +11,4 @@
 #include "similarity_converter.h"
 #include "spend_timer.h"
 #include "information.h"
-#include "face_warpper.h"
+#include "face_wrapper.h"
--- a/cpp-package/inspireface/cpp/inspireface/include/inspireface/session.h
+++ b/cpp-package/inspireface/cpp/inspireface/include/inspireface/session.h
@@ -3,7 +3,7 @@
 #include <memory>
 #include "data_type.h"
 #include "frame_process.h"
-#include "face_warpper.h"
+#include "face_wrapper.h"

 namespace inspire {

--- a/cpp-package/inspireface/cpp/inspireface/pipeline_module/face_pipeline_module.h
+++ b/cpp-package/inspireface/cpp/inspireface/pipeline_module/face_pipeline_module.h
@@ -13,7 +13,7 @@
 #include "liveness/rgb_anti_spoofing_adapt.h"
 #include "liveness/blink_predict_adapt.h"
 #include "middleware/model_archive/inspire_archive.h"
-#include "face_warpper.h"
+#include "face_wrapper.h"
 #include "track_module/landmark/landmark_param.h"
 #include "attribute/face_emotion_adapt.h"

--- a/cpp-package/inspireface/cpp/inspireface/recognition_module/face_feature_extraction_module.h
+++ b/cpp-package/inspireface/cpp/inspireface/recognition_module/face_feature_extraction_module.h
@@ -8,7 +8,7 @@
 #include <mutex>
 #include "extract/extract_adapt.h"
 #include "common/face_info/face_object_internal.h"
-#include "face_warpper.h"
+#include "face_wrapper.h"
 #include "middleware/model_archive/inspire_archive.h"
 #include "frame_process.h"

--- a/cpp-package/inspireface/cpp/inspireface/track_module/face_detect/face_detect_adapt.cpp
+++ b/cpp-package/inspireface/cpp/inspireface/track_module/face_detect/face_detect_adapt.cpp
@@ -22,7 +22,12 @@ FaceLocList FaceDetectAdapt::operator()(const inspirecv::Image &bgr) {
    inspirecv::Image pad;

    uint8_t *resized_data = nullptr;
-    m_processor_->ResizeAndPadding(bgr.Data(), bgr.Width(), bgr.Height(), bgr.Channels(), m_input_size_, m_input_size_, &resized_data, scale);
+    if (ori_w == m_input_size_ && ori_h == m_input_size_) {
+        scale = 1.0f;
+        resized_data = (uint8_t *)bgr.Data();
+    } else {
+        m_processor_->ResizeAndPadding(bgr.Data(), bgr.Width(), bgr.Height(), bgr.Channels(), m_input_size_, m_input_size_, &resized_data, scale);
+    }

    pad = inspirecv::Image::Create(m_input_size_, m_input_size_, bgr.Channels(), resized_data, false);

--- a/cpp-package/inspireface/cpp/inspireface/track_module/face_track_module.cpp
+++ b/cpp-package/inspireface/cpp/inspireface/track_module/face_track_module.cpp
@@ -451,9 +451,9 @@ void FaceTrackModule::SetTrackPreviewSize(int preview_size) {
    track_preview_size_ = preview_size;
    if (track_preview_size_ == -1) {
        track_preview_size_ = m_face_detector_->GetInputSize();
-    } else if (track_preview_size_ < 192) {
-        INSPIRE_LOGW("Track preview size %d is less than the minimum input size %d", track_preview_size_, 192);
-        track_preview_size_ = 192;
+    } else if (track_preview_size_ < 160) {
+        INSPIRE_LOGW("Track preview size %d is less than the minimum input size %d", track_preview_size_, 160);
+        track_preview_size_ = 160;
    }
 }

--- a/cpp-package/inspireface/cpp/sample/api/sample_face_track.c
+++ b/cpp-package/inspireface/cpp/sample/api/sample_face_track.c
@@ -98,7 +98,7 @@ int main(int argc, char* argv[]) {
    /* Maximum number of faces detected */
    maxDetectNum = 20;
    /* Face detection image input level */
-    detectPixelLevel = 160;
+    detectPixelLevel = 320;
    /* Handle of the current face SDK algorithm context */
    session = NULL;
    ret = HFCreateInspireFaceSessionOptional(option, detMode, maxDetectNum, detectPixelLevel, -1, &session);
@@ -156,6 +156,7 @@ int main(int argc, char* argv[]) {
        HFLogPrint(HF_LOG_INFO, "Token size: %d", multipleFaceData.tokens[index].size);
        HFLogPrint(HF_LOG_INFO, "Process face index: %d", index);
        HFLogPrint(HF_LOG_INFO, "DetConfidence: %f", multipleFaceData.detConfidence[index]);
+        HFLogPrint(HF_LOG_INFO, "TrackCount: %d", multipleFaceData.trackCounts[index]);
        
        HFImageBitmapDrawRect(drawImage, multipleFaceData.rects[index], (HColor){0, 100, 255}, 4);

--- a/cpp-package/inspireface/cpp/sample/api/sample_face_track_benchmark.c
+++ b/cpp-package/inspireface/cpp/sample/api/sample_face_track_benchmark.c
@@ -63,7 +63,7 @@ int main(int argc, char* argv[]) {
    HOption option = HF_ENABLE_QUALITY | HF_ENABLE_MASK_DETECT | HF_ENABLE_LIVENESS;
    /* Non-video or frame sequence mode uses IMAGE-MODE, which is always face detection without
     * tracking */
-    HFDetectMode detMode = HF_DETECT_MODE_ALWAYS_DETECT;
+    HFDetectMode detMode = HF_DETECT_MODE_LIGHT_TRACK;
    /* Maximum number of faces detected */
    HInt32 maxDetectNum = 20;
    /* Face detection image input level */
@@ -112,10 +112,28 @@ int main(int argc, char* argv[]) {
    HFLogPrint(HF_LOG_INFO, "Number of Detection: %d", multipleFaceData.detectedNum);
    HFSessionPrintTrackCostSpend(session);

+    if (multipleFaceData.detectedNum > 0) {
+        HFLogPrint(HF_LOG_INFO, "========================================");
+        for (i = 0; i < multipleFaceData.detectedNum; i++) {
+            HFLogPrint(HF_LOG_INFO, "TrackId: %d", multipleFaceData.trackIds[i]);
+            HFLogPrint(HF_LOG_INFO, "TrackCount: %d", multipleFaceData.trackCounts[i]);
+        }
+    } else {
+        HFLogPrint(HF_LOG_WARN, "The face cannot be detected, and the tracking test results may be invalid!");
+    }
+
    ret = HFReleaseImageStream(imageHandle);
    if (ret != HSUCCEED) {
        HFLogPrint(HF_LOG_ERROR, "Release image stream error: %d", ret);
    }
+
+
+    ret = HFReleaseImageBitmap(image);
+    if (ret != HSUCCEED) {
+        HFLogPrint(HF_LOG_ERROR, "Release image bitmap error: %d", ret);
+        return ret;
+    }    
+    
    /* The memory must be freed at the end of the program */
    ret = HFReleaseInspireFaceSession(session);
    if (ret != HSUCCEED) {
@@ -123,11 +141,5 @@ int main(int argc, char* argv[]) {
        return ret;
    }

-    ret = HFReleaseImageBitmap(image);
-    if (ret != HSUCCEED) {
-        HFLogPrint(HF_LOG_ERROR, "Release image bitmap error: %d", ret);
-        return ret;
-    }
-
    return 0;
 }
--- a/cpp-package/inspireface/doc/diagrams/mem_model.drawio
+++ b/cpp-package/inspireface/doc/diagrams/mem_model.drawio
@@ -1,5 +1,5 @@
 <mxfile host="Electron" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.7.5 Chrome/126.0.6478.183 Electron/31.3.0 Safari/537.36" version="24.7.5">
-  <diagram name="第 1 页" id="FP0FvKHnDjYPDM2ZKx7P">
+  <diagram name="page 1" id="FP0FvKHnDjYPDM2ZKx7P">
    <mxGraphModel dx="781" dy="688" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
      <root>
        <mxCell id="0" />
--- a/cpp-package/inspireface/python/README.md
+++ b/cpp-package/inspireface/python/README.md
@@ -1,102 +1,102 @@
 # InspireFace Python API

-InspireFace 提供了简单易用的 Python API，通过 ctypes 封装底层动态链接库实现。您可以通过 pip 安装最新发布版本，或使用项目自行编译的动态库进行配置。
+InspireFace provides an easy-to-use Python API that wraps the underlying dynamic link library through ctypes. You can install the latest release version via pip or configure it using the project's self-compiled dynamic library.

-## 快速安装
+## Quick Installation

-### 通过 pip 安装（推荐）
+### Install via pip (Recommended)

 ```bash
 pip install inspireface
 ```

-### 手动安装
+### Manual Installation

-1. 首先安装必要的依赖：
+1. First install the necessary dependencies:
 ```bash
 pip install loguru tqdm opencv-python
 ```

-2. 将编译好的动态库复制到指定目录：
+2. Copy the compiled dynamic library to the specified directory:
 ```bash
-# 将编译好的动态库复制到对应系统架构目录
+# Copy the compiled dynamic library to the corresponding system architecture directory
 cp YOUR_BUILD_DIR/libInspireFace.so inspireface/modules/core/SYSTEM/CORE_ARCH/
 ```

-3. 安装 Python 包：
+3. Install the Python package:
 ```bash
 python setup.py install
 ```

-## 快速开始
+## Quick Start

-以下是一个简单的示例，展示如何使用 InspireFace 进行人脸检测和关键点绘制：
+Here's a simple example showing how to use InspireFace for face detection and landmark drawing:

 ```python
 import cv2
 import inspireface as isf

-# 创建会话，启用所需功能
+# Create session with required features enabled
 session = isf.InspireFaceSession(
-    opt=isf.HF_ENABLE_NONE,  # 可选功能
-    detect_mode=isf.HF_DETECT_MODE_ALWAYS_DETECT  # 检测模式
+    opt=isf.HF_ENABLE_NONE,  # Optional features
+    detect_mode=isf.HF_DETECT_MODE_ALWAYS_DETECT  # Detection mode
 )

-# 设置检测置信度阈值
+# Set detection confidence threshold
 session.set_detection_confidence_threshold(0.5)

-# 读取图像
+# Read image
 image = cv2.imread("path/to/your/image.jpg")
-assert image is not None, "请检查图像路径是否正确"
+assert image is not None, "Please check if the image path is correct"

-# 执行人脸检测
+# Perform face detection
 faces = session.face_detection(image)
-print(f"检测到 {len(faces)} 个人脸")
+print(f"Detected {len(faces)} faces")

-# 在图像上绘制检测结果
+# Draw detection results on image
 draw = image.copy()
 for idx, face in enumerate(faces):
-    # 获取人脸框位置
+    # Get face bounding box coordinates
    x1, y1, x2, y2 = face.location
    
-    # 计算旋转框参数
+    # Calculate rotated box parameters
    center = ((x1 + x2) / 2, (y1 + y2) / 2)
    size = (x2 - x1, y2 - y1)
    angle = face.roll
    
-    # 绘制旋转框
+    # Draw rotated box
    rect = ((center[0], center[1]), (size[0], size[1]), angle)
    box = cv2.boxPoints(rect)
    box = box.astype(int)
    cv2.drawContours(draw, [box], 0, (100, 180, 29), 2)
    
-    # 绘制关键点
+    # Draw landmarks
    landmarks = session.get_face_dense_landmark(face)
    for x, y in landmarks.astype(int):
        cv2.circle(draw, (x, y), 0, (220, 100, 0), 2)
 ```

-## 更多示例
+## More Examples

-项目提供了多个示例文件，展示了不同的功能：
+The project provides multiple example files demonstrating different features:

- `sample_face_detection.py`: 基础人脸检测
- `sample_face_track_from_video.py`: 视频人脸跟踪
- `sample_face_recognition.py`: 人脸识别
- `sample_face_comparison.py`: 人脸比对
- `sample_feature_hub.py`: 特征提取
- `sample_system_resource_statistics.py`: 系统资源统计
+- `sample_face_detection.py`: Basic face detection
+- `sample_face_track_from_video.py`: Video face tracking
+- `sample_face_recognition.py`: Face recognition
+- `sample_face_comparison.py`: Face comparison
+- `sample_feature_hub.py`: Feature extraction
+- `sample_system_resource_statistics.py`: System resource statistics

-## 运行测试
+## Running Tests

-项目包含单元测试，您可以通过修改 `test/test_settings.py` 中的参数来调整测试内容：
+The project includes unit tests. You can adjust test content by modifying parameters in `test/test_settings.py`:

 ```bash
 python -m unittest discover -s test
 ```

-## 注意事项
+## Notes

-1. 确保系统已安装 OpenCV 和其他必要依赖
-2. 使用前请确保动态库已正确安装
-3. 建议使用 Python 3.7 或更高版本
+1. Ensure that OpenCV and other necessary dependencies are installed on your system
+2. Make sure the dynamic library is correctly installed before use
+3. Python 3.7 or higher is recommended
--- a/cpp-package/inspireface/python/inspireface/modules/core/native.py
+++ b/cpp-package/inspireface/python/inspireface/modules/core/native.py
--- a/cpp-package/inspireface/python/inspireface/modules/inspireface.py
+++ b/cpp-package/inspireface/python/inspireface/modules/inspireface.py
@@ -197,6 +197,7 @@ class FaceInformation:

    def __init__(self,
                 track_id: int,
+                 track_count: int,
                 detection_confidence: float,
                 location: Tuple,
                 roll: float,
@@ -205,6 +206,7 @@ class FaceInformation:
                 _token: HFFaceBasicToken,
                 _feature: np.array = None):
        self.track_id = track_id
+        self.track_count = track_count
        self.detection_confidence = detection_confidence
        self.location = location
        self.roll = roll
@@ -226,7 +228,7 @@ class FaceInformation:
        self._token.data = cast(addressof(self.buffer), c_void_p)

    def __repr__(self) -> str:
-        return f"FaceInformation(track_id={self.track_id}, detection_confidence={self.detection_confidence}, location={self.location}, roll={self.roll}, yaw={self.yaw}, pitch={self.pitch})"
+        return f"FaceInformation(track_id={self.track_id}, track_count={self.track_count}, detection_confidence={self.detection_confidence}, location={self.location}, roll={self.roll}, yaw={self.yaw}, pitch={self.pitch})"


@dataclass
@@ -336,6 +338,7 @@ class InspireFaceSession(object):
            track_ids = self._get_faces_track_ids()
            euler_angle = self._get_faces_euler_angle()
            tokens = self._get_faces_tokens()
+            track_counts = self._get_faces_track_counts()

            infos = list()
            for idx in range(self.multiple_faces.detectedNum):
@@ -347,6 +350,7 @@ class InspireFaceSession(object):
                track_id = track_ids[idx]
                _token = tokens[idx]
                detection_confidence = self.multiple_faces.detConfidence[idx]
+                track_count = track_counts[idx]

                info = FaceInformation(
                    location=(top_left[0], top_left[1], bottom_right[0], bottom_right[1]),
@@ -354,6 +358,7 @@ class InspireFaceSession(object):
                    yaw=yaw,
                    pitch=pitch,
                    track_id=track_id,
+                    track_count=track_count,
                    _token=_token,
                    detection_confidence=detection_confidence,
                )
@@ -641,6 +646,12 @@ class InspireFaceSession(object):
        angles = [(euler_angle.roll[i], euler_angle.yaw[i], euler_angle.pitch[i]) for i in range(num_of_faces)]

        return angles
+     
+    def _get_faces_track_counts(self) -> List:
+        num_of_faces = self.multiple_faces.detectedNum
+        track_counts_ptr = self.multiple_faces.trackCounts
+        track_counts = [track_counts_ptr[i] for i in range(num_of_faces)]
+        return track_counts

    def _get_faces_tokens(self) -> List[HFFaceBasicToken]:
        num_of_faces = self.multiple_faces.detectedNum
--- a/cpp-package/inspireface/python/sample_face_track_from_video.py
+++ b/cpp-package/inspireface/python/sample_face_track_from_video.py
@@ -135,7 +135,7 @@ def case_face_tracker_from_video(source, show, out):
                cv2.circle(frame, (x, y), 0, (255-color[0], 255-color[1], 255-color[2]), 6)

            # Draw track ID at the top of the bounding box
-            text = f"ID: {face.track_id}"
+            text = f"ID: {face.track_id}, Count: {face.track_count}"
            text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
            text_x = min(box[:, 0])
            text_y = min(box[:, 1]) - 10