diff --git a/.gitignore b/.gitignore index 2eb578f..7ea648a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ tmp_* .vscode/ +*.onnx # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/uniface-cpp/.clang-format b/uniface-cpp/.clang-format new file mode 100644 index 0000000..f9fe946 --- /dev/null +++ b/uniface-cpp/.clang-format @@ -0,0 +1,180 @@ +--- +# Modern C++ style based on Google with enhancements +Language: Cpp +Standard: c++17 + +BasedOnStyle: Google +ColumnLimit: 100 +IndentWidth: 4 +TabWidth: 4 +UseTab: Never + +# Access modifiers +AccessModifierOffset: -4 +IndentAccessModifiers: false + +# Alignment +AlignAfterOpenBracket: BlockIndent +AlignArrayOfStructures: Right +AlignConsecutiveAssignments: + Enabled: false +AlignConsecutiveBitFields: + Enabled: true +AlignConsecutiveDeclarations: + Enabled: false +AlignConsecutiveMacros: + Enabled: true +AlignEscapedNewlines: Left +AlignOperands: AlignAfterOperator +AlignTrailingComments: + Kind: Always + OverEmptyLines: 1 + +# Arguments and parameters +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +BinPackArguments: false +BinPackParameters: false + +# Short forms +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: Inline +AllowShortLoopsOnASingleLine: false + +# Break behavior +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BreakAfterAttributes: Leave +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeConceptDeclarations: Always +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeComma +BreakStringLiterals: true + +# Braces +InsertBraces: false +RemoveBracesLLVM: false + +# Constructors +PackConstructorInitializers: CurrentLine +ConstructorInitializerIndentWidth: 4 + +# Empty lines +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock +KeepEmptyLinesAtTheStartOfBlocks: false +MaxEmptyLinesToKeep: 1 +SeparateDefinitionBlocks: Always + +# Includes +IncludeBlocks: Regroup +IncludeCategories: + # Main header (same name as source file) + - Regex: '^"([a-zA-Z0-9_]+)\.(h|hpp)"$' + Priority: 1 + SortPriority: 1 + CaseSensitive: true + # Project headers + - Regex: '^".*"$' + Priority: 2 + SortPriority: 2 + # C system headers + - Regex: '^<(assert|complex|ctype|errno|fenv|float|inttypes|iso646|limits|locale|math|setjmp|signal|stdalign|stdarg|stdatomic|stdbool|stddef|stdint|stdio|stdlib|stdnoreturn|string|tgmath|threads|time|uchar|wchar|wctype)\.h>$' + Priority: 3 + SortPriority: 3 + # C++ standard library + - Regex: '^<[a-z_]+>$' + Priority: 4 + SortPriority: 4 + # External libraries + - Regex: '^<.*>$' + Priority: 5 + SortPriority: 5 +SortIncludes: CaseSensitive + +# Indentation +IndentCaseBlocks: false +IndentCaseLabels: true +IndentExternBlock: NoIndent +IndentGotoLabels: false +IndentPPDirectives: AfterHash +IndentRequiresClause: true +IndentWrappedFunctionNames: false + +# Lambdas +LambdaBodyIndentation: Signature + +# Namespaces +CompactNamespaces: false +FixNamespaceComments: true +NamespaceIndentation: None +ShortNamespaceLines: 0 + +# Penalties (guide formatting decisions) +PenaltyBreakAssignment: 25 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 200 + +# Pointers and references +DerivePointerAlignment: false +PointerAlignment: Left +ReferenceAlignment: Pointer +QualifierAlignment: Leave + +# Requires clause (C++20 concepts) +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope + +# Spacing +BitFieldColonSpacing: Both +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: Never +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParentheses: false +SpacesInSquareBrackets: false + +# Other +Cpp11BracedListStyle: true +InsertNewlineAtEOF: true +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 4 + Decimal: 3 + Hex: 4 +ReflowComments: true +RemoveSemicolon: false +SortUsingDeclarations: LexicographicNumeric +... diff --git a/uniface-cpp/CMakeLists.txt b/uniface-cpp/CMakeLists.txt new file mode 100644 index 0000000..8ef8d8b --- /dev/null +++ b/uniface-cpp/CMakeLists.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.14) + +project(uniface + VERSION 1.0.0 + DESCRIPTION "Uniface C++ face analysis library" + LANGUAGES CXX +) + +# Options +option(UNIFACE_BUILD_EXAMPLES "Build example programs" ON) + +# C++ standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +# Compiler warnings +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +elseif(MSVC) + add_compile_options(/W4) +endif() + +# Find dependencies +find_package(OpenCV REQUIRED COMPONENTS core imgproc dnn) + +# Library +add_library(uniface + src/detector.cpp +) + +target_include_directories(uniface + PUBLIC + $ + $ +) + +target_link_libraries(uniface + PUBLIC + ${OpenCV_LIBS} +) + +# Examples +if(UNIFACE_BUILD_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/uniface-cpp/README.md b/uniface-cpp/README.md new file mode 100644 index 0000000..f323c10 --- /dev/null +++ b/uniface-cpp/README.md @@ -0,0 +1,69 @@ +# Uniface C++ + +C++ implementation of the Uniface face analysis library. + +## Features + +- **Face Detection** - RetinaFace detector with 5-point landmarks + +## Requirements + +- C++17 compiler +- CMake 3.14+ +- OpenCV 4.x + +## Build + +```bash +mkdir build && cd build +cmake .. +make -j$(nproc) +``` + +## Usage + +### Image Detection + +```bash +./examples/detect +``` + +### Webcam Demo + +```bash +./examples/webcam [camera_id] +``` + +### Code Example + +```cpp +#include +#include + +int main() { + uniface::RetinaFace detector("retinaface.onnx"); + + cv::Mat image = cv::imread("photo.jpg"); + auto faces = detector.detect(image); + + for (const auto& face : faces) { + cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2); + } + + cv::imwrite("result.jpg", image); + return 0; +} +``` + +## Models + +Download models from the main uniface repository or use: + +```bash +# RetinaFace MobileNet V2 +wget https://github.com/your-repo/uniface/releases/download/v1.0/retinaface_mv2.onnx -P models/ +``` + +## License + +Same license as the main uniface project. diff --git a/uniface-cpp/examples/CMakeLists.txt b/uniface-cpp/examples/CMakeLists.txt new file mode 100644 index 0000000..4be1992 --- /dev/null +++ b/uniface-cpp/examples/CMakeLists.txt @@ -0,0 +1,11 @@ +# Examples + +find_package(OpenCV REQUIRED COMPONENTS highgui imgcodecs videoio) + +# Image detection example +add_executable(detect detect.cpp) +target_link_libraries(detect PRIVATE uniface ${OpenCV_LIBS}) + +# Webcam example +add_executable(webcam webcam.cpp) +target_link_libraries(webcam PRIVATE uniface ${OpenCV_LIBS}) diff --git a/uniface-cpp/examples/detect.cpp b/uniface-cpp/examples/detect.cpp new file mode 100644 index 0000000..b8e73b0 --- /dev/null +++ b/uniface-cpp/examples/detect.cpp @@ -0,0 +1,53 @@ +/** + * @file detect.cpp + * @brief Face detection on a single image + */ + +#include + +#include +#include +#include + +int main(int argc, char** argv) { + if (argc < 3) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + const std::string model_path = argv[1]; + const std::string image_path = argv[2]; + + try { + uniface::RetinaFace detector(model_path); + + cv::Mat image = cv::imread(image_path); + if (image.empty()) { + std::cerr << "Failed to load image: " << image_path << std::endl; + return 1; + } + + const auto faces = detector.detect(image); + std::cout << "Detected " << faces.size() << " faces." << std::endl; + + // Draw results + for (const auto& face : faces) { + cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2); + for (const auto& pt : face.landmarks) { + cv::circle(image, pt, 2, cv::Scalar(0, 0, 255), -1); + } + } + + cv::imwrite("result.jpg", image); + std::cout << "Saved result to result.jpg" << std::endl; + + } catch (const cv::Exception& e) { + std::cerr << "OpenCV Error: " << e.what() << std::endl; + return 1; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/uniface-cpp/examples/webcam.cpp b/uniface-cpp/examples/webcam.cpp new file mode 100644 index 0000000..a936f76 --- /dev/null +++ b/uniface-cpp/examples/webcam.cpp @@ -0,0 +1,137 @@ +/** + * @file webcam.cpp + * @brief Real-time face detection using webcam + */ + +#include +#include + +#include +#include +#include +#include + +int main(int argc, char** argv) { + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " [camera_id]" << std::endl; + std::cout << " camera_id: Camera device ID (default: 0)" << std::endl; + return 1; + } + + const std::string model_path = argv[1]; + const int camera_id = (argc >= 3) ? std::atoi(argv[2]) : 0; + + try { + std::cout << "Loading model: " << model_path << std::endl; + uniface::RetinaFace detector(model_path); + std::cout << "Model loaded successfully!" << std::endl; + + cv::VideoCapture cap(camera_id); + if (!cap.isOpened()) { + std::cerr << "Error: Cannot open camera " << camera_id << std::endl; + return 1; + } + + const int frame_width = static_cast(cap.get(cv::CAP_PROP_FRAME_WIDTH)); + const int frame_height = static_cast(cap.get(cv::CAP_PROP_FRAME_HEIGHT)); + std::cout << "Camera opened: " << frame_width << "x" << frame_height << std::endl; + std::cout << "Press 'q' to quit, 's' to save screenshot" << std::endl; + + cv::Mat frame; + int frame_count = 0; + double total_time = 0.0; + + while (true) { + cap >> frame; + if (frame.empty()) { + std::cerr << "Error: Empty frame captured" << std::endl; + break; + } + + const auto start = std::chrono::high_resolution_clock::now(); + const auto faces = detector.detect(frame); + const auto end = std::chrono::high_resolution_clock::now(); + + const std::chrono::duration elapsed = end - start; + const double inference_time = elapsed.count(); + + ++frame_count; + total_time += inference_time; + const double avg_time = total_time / static_cast(frame_count); + const double fps = 1000.0 / avg_time; + + // Draw results + for (const auto& face : faces) { + cv::rectangle(frame, face.bbox, cv::Scalar(0, 255, 0), 2); + + for (size_t i = 0; i < face.landmarks.size(); ++i) { + cv::Scalar color; + if (i < 2) { + color = cv::Scalar(255, 0, 0); // Eyes - Blue + } else if (i == 2) { + color = cv::Scalar(0, 255, 0); // Nose - Green + } else { + color = cv::Scalar(0, 0, 255); // Mouth - Red + } + cv::circle(frame, face.landmarks[i], 3, color, -1); + } + + // Draw confidence + const std::string conf_text = cv::format("%.2f", face.confidence); + const cv::Point text_org( + static_cast(face.bbox.x), static_cast(face.bbox.y) - 5 + ); + cv::putText( + frame, + conf_text, + text_org, + cv::FONT_HERSHEY_SIMPLEX, + 0.5, + cv::Scalar(0, 255, 0), + 1 + ); + } + + // Draw FPS info + const std::string info_text = cv::format( + "FPS: %.1f | Faces: %zu | Time: %.1fms", fps, faces.size(), inference_time + ); + cv::putText( + frame, + info_text, + cv::Point(10, 30), + cv::FONT_HERSHEY_SIMPLEX, + 0.7, + cv::Scalar(0, 255, 0), + 2 + ); + + cv::imshow("Uniface - Face Detection", frame); + + const char key = static_cast(cv::waitKey(1)); + if (key == 'q' || key == 27) { + break; + } else if (key == 's') { + const std::string filename = cv::format("screenshot_%d.jpg", frame_count); + cv::imwrite(filename, frame); + std::cout << "Screenshot saved: " << filename << std::endl; + } + } + + cap.release(); + cv::destroyAllWindows(); + + std::cout << "\n=== Statistics ===" << std::endl; + std::cout << "Total frames: " << frame_count << std::endl; + std::cout << "Average inference time: " << (total_time / frame_count) << " ms" << std::endl; + + } catch (const cv::Exception& e) { + std::cerr << "OpenCV Error: " << e.what() << std::endl; + return 1; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/uniface-cpp/include/uniface/detector.hpp b/uniface-cpp/include/uniface/detector.hpp new file mode 100644 index 0000000..d58904f --- /dev/null +++ b/uniface-cpp/include/uniface/detector.hpp @@ -0,0 +1,74 @@ +/** + * @file detector.hpp + * @brief RetinaFace face detector + */ + +#ifndef UNIFACE_DETECTOR_HPP_ +#define UNIFACE_DETECTOR_HPP_ + +#include "uniface/types.hpp" + +#include +#include +#include + +#include + +namespace uniface { + +/** + * @brief RetinaFace detector using OpenCV DNN backend + * + * This class implements the RetinaFace face detection algorithm using + * ONNX models with OpenCV's DNN module for inference. + */ +class RetinaFace { +public: + /** + * @brief Construct a new RetinaFace detector + * + * @param model_path Path to the ONNX model file + * @param conf_thresh Confidence threshold for filtering detections (default: 0.5) + * @param nms_thresh Non-Maximum Suppression IoU threshold (default: 0.4) + * @param input_size Model input size (default: 640x640) + */ + explicit RetinaFace( + const std::string& model_path, + float conf_thresh = 0.5f, + float nms_thresh = 0.4f, + cv::Size input_size = cv::Size(640, 640) + ); + + /** + * @brief Detect faces in an image + * + * @param image Input BGR image + * @return std::vector Detected faces with bounding boxes, confidence scores, + * and facial landmarks + */ + [[nodiscard]] std::vector detect(const cv::Mat& image); + + // Accessors + [[nodiscard]] float getConfidenceThreshold() const noexcept { return confidence_threshold_; } + + [[nodiscard]] float getNmsThreshold() const noexcept { return nms_threshold_; } + + [[nodiscard]] cv::Size getInputSize() const noexcept { return input_size_; } + + void setConfidenceThreshold(float threshold) noexcept { confidence_threshold_ = threshold; } + + void setNmsThreshold(float threshold) noexcept { nms_threshold_ = threshold; } + +private: + cv::dnn::Net net_; + float confidence_threshold_; + float nms_threshold_; + cv::Size input_size_; + std::vector> anchors_; + + void generateAnchors(); +}; + +} // namespace uniface + +#endif // UNIFACE_DETECTOR_HPP_ diff --git a/uniface-cpp/include/uniface/types.hpp b/uniface-cpp/include/uniface/types.hpp new file mode 100644 index 0000000..8d5fd15 --- /dev/null +++ b/uniface-cpp/include/uniface/types.hpp @@ -0,0 +1,27 @@ +/** + * @file types.hpp + * @brief Common data types for uniface + */ + +#ifndef UNIFACE_TYPES_HPP_ +#define UNIFACE_TYPES_HPP_ + +#include + +#include + +namespace uniface { + +/** + * @brief Detected face data structure + */ +struct Face { + cv::Rect2f bbox; ///< Bounding box [x, y, width, height] in original image coordinates + float confidence; ///< Detection confidence score [0.0, 1.0] + std::vector landmarks; ///< 5 facial landmarks: + ///< [left_eye, right_eye, nose, left_mouth, right_mouth] +}; + +} // namespace uniface + +#endif // UNIFACE_TYPES_HPP_ diff --git a/uniface-cpp/include/uniface/uniface.hpp b/uniface-cpp/include/uniface/uniface.hpp new file mode 100644 index 0000000..e53249c --- /dev/null +++ b/uniface-cpp/include/uniface/uniface.hpp @@ -0,0 +1,14 @@ +/** + * @file uniface.hpp + * @brief Main header for uniface C++ library + * + * Include this header to access all uniface functionality. + */ + +#ifndef UNIFACE_HPP_ +#define UNIFACE_HPP_ + +#include "uniface/detector.hpp" +#include "uniface/types.hpp" + +#endif // UNIFACE_HPP_ diff --git a/uniface-cpp/models/.gitkeep b/uniface-cpp/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/uniface-cpp/src/detector.cpp b/uniface-cpp/src/detector.cpp new file mode 100644 index 0000000..03932f8 --- /dev/null +++ b/uniface-cpp/src/detector.cpp @@ -0,0 +1,275 @@ +/** + * @file detector.cpp + * @brief RetinaFace detector implementation + */ + +#include "uniface/detector.hpp" + +#include +#include + +#include + +namespace uniface { + +namespace { + +// Model configuration constants +constexpr std::array kFeatureStrides = {8, 16, 32}; +constexpr std::array kVariance = {0.1f, 0.2f}; +constexpr int kNumLandmarks = 5; + +// BGR mean values for image normalization +constexpr float kMeanB = 104.0f; +constexpr float kMeanG = 117.0f; +constexpr float kMeanR = 123.0f; + +// Anchor min sizes for each feature map level +const std::vector> kMinSizes = { + { 16, 32}, + { 64, 128}, + {256, 512} +}; + +/** + * @brief Resize image while preserving aspect ratio with letterbox padding + */ +float letterboxResize(const cv::Mat& src, cv::Mat& dst, const cv::Size& target_size) { + const auto src_height = static_cast(src.rows); + const auto src_width = static_cast(src.cols); + const auto target_height = static_cast(target_size.height); + const auto target_width = static_cast(target_size.width); + + const float im_ratio = src_height / src_width; + const float model_ratio = target_height / target_width; + + int new_width = 0; + int new_height = 0; + + if (im_ratio > model_ratio) { + new_height = static_cast(target_height); + new_width = static_cast(static_cast(new_height) / im_ratio); + } else { + new_width = static_cast(target_width); + new_height = static_cast(static_cast(new_width) * im_ratio); + } + + const float resize_factor = static_cast(new_height) / src_height; + + cv::Mat resized; + cv::resize(src, resized, cv::Size(new_width, new_height)); + + dst = cv::Mat::zeros(target_size, CV_8UC3); + resized.copyTo(dst(cv::Rect(0, 0, new_width, new_height))); + + return resize_factor; +} + +} // namespace + +RetinaFace::RetinaFace( + const std::string& model_path, float conf_thresh, float nms_thresh, cv::Size input_size +) + : net_(cv::dnn::readNetFromONNX(model_path)) + , confidence_threshold_(conf_thresh) + , nms_threshold_(nms_thresh) + , input_size_(input_size) { + generateAnchors(); +} + +void RetinaFace::generateAnchors() { + anchors_.clear(); + + // Pre-calculate approximate anchor count for reservation + size_t estimated_anchors = 0; + for (size_t k = 0; k < kFeatureStrides.size(); ++k) { + const int step = kFeatureStrides[k]; + const auto feature_h = static_cast( + std::ceil(static_cast(input_size_.height) / static_cast(step)) + ); + const auto feature_w = static_cast( + std::ceil(static_cast(input_size_.width) / static_cast(step)) + ); + estimated_anchors += feature_h * feature_w * kMinSizes[k].size(); + } + anchors_.reserve(estimated_anchors); + + // Generate anchors for each feature map level + for (size_t k = 0; k < kFeatureStrides.size(); ++k) { + const int step = kFeatureStrides[k]; + const int feature_h = static_cast( + std::ceil(static_cast(input_size_.height) / static_cast(step)) + ); + const int feature_w = static_cast( + std::ceil(static_cast(input_size_.width) / static_cast(step)) + ); + + for (int i = 0; i < feature_h; ++i) { + for (int j = 0; j < feature_w; ++j) { + for (const int min_size : kMinSizes[k]) { + const float s_kx = static_cast(min_size) / + static_cast(input_size_.height); + const float s_ky = static_cast(min_size) / + static_cast(input_size_.width); + const float cx = (static_cast(j) + 0.5f) * static_cast(step) / + static_cast(input_size_.height); + const float cy = (static_cast(i) + 0.5f) * static_cast(step) / + static_cast(input_size_.width); + + anchors_.push_back({cx, cy, s_kx, s_ky}); + } + } + } + } +} + +std::vector RetinaFace::detect(const cv::Mat& image) { + // Preprocess image + cv::Mat input_blob; + const float resize_factor = letterboxResize(image, input_blob, input_size_); + + // Create blob with mean subtraction + const cv::Mat blob = cv::dnn::blobFromImage( + input_blob, 1.0, cv::Size(), cv::Scalar(kMeanB, kMeanG, kMeanR), false, false + ); + + // Run inference + net_.setInput(blob); + const auto output_names = net_.getUnconnectedOutLayersNames(); + std::vector outputs; + net_.forward(outputs, output_names); + + // Validate output count + if (outputs.size() < 3) { + std::cerr << "Error: Model output count mismatch. Expected at least 3, got " + << outputs.size() << std::endl; + return {}; + } + + // Identify outputs by shape: loc(N,4), conf(N,2), landmarks(N,10) + cv::Mat loc_output; + cv::Mat conf_output; + cv::Mat land_output; + + for (const auto& output : outputs) { + switch (output.size[2]) { + case 4: + loc_output = output; + break; + case 2: + conf_output = output; + break; + case 10: + land_output = output; + break; + default: + break; + } + } + + // Fallback to positional outputs if shape matching failed + if (loc_output.empty()) + loc_output = outputs[0]; + if (conf_output.empty()) + conf_output = outputs[1]; + if (land_output.empty()) + land_output = outputs[2]; + + // Get raw data pointers + const auto* loc_data = reinterpret_cast(loc_output.data); + const auto* conf_data = reinterpret_cast(conf_output.data); + const auto* land_data = reinterpret_cast(land_output.data); + + const auto num_priors = static_cast(loc_output.size[1]); + + // Validate anchor count + if (num_priors != anchors_.size()) { + std::cerr << "Error: Anchor count mismatch! Expected " << anchors_.size() + << " anchors but model output has " << num_priors << " priors.\n" + << "This usually means the input size doesn't match the model's " + << "expected size." << std::endl; + return {}; + } + + // Decode detections + std::vector decoded_boxes; + std::vector scores; + std::vector> decoded_landmarks; + + decoded_boxes.reserve(num_priors); + scores.reserve(num_priors); + decoded_landmarks.reserve(num_priors); + + const auto scale_w = static_cast(input_size_.width); + const auto scale_h = static_cast(input_size_.height); + + for (size_t i = 0; i < num_priors; ++i) { + const float score = conf_data[i * 2 + 1]; + if (score < confidence_threshold_) { + continue; + } + + // Get anchor parameters + const float px = anchors_[i][0]; + const float py = anchors_[i][1]; + const float pw = anchors_[i][2]; + const float ph = anchors_[i][3]; + + // Decode bounding box + const float dx = loc_data[i * 4 + 0]; + const float dy = loc_data[i * 4 + 1]; + const float dw = loc_data[i * 4 + 2]; + const float dh = loc_data[i * 4 + 3]; + + const float cx = px + dx * kVariance[0] * pw; + const float cy = py + dy * kVariance[0] * ph; + const float w = pw * std::exp(dw * kVariance[1]); + const float h = ph * std::exp(dh * kVariance[1]); + + // Convert center format to corner format and scale to original image + const float x1 = (cx - w / 2.0f) * scale_w / resize_factor; + const float y1 = (cy - h / 2.0f) * scale_h / resize_factor; + const float x2 = (cx + w / 2.0f) * scale_w / resize_factor; + const float y2 = (cy + h / 2.0f) * scale_h / resize_factor; + + decoded_boxes.emplace_back(x1, y1, x2 - x1, y2 - y1); + scores.push_back(score); + + // Decode landmarks + std::vector landmarks; + landmarks.reserve(kNumLandmarks); + + for (int k = 0; k < kNumLandmarks; ++k) { + const float ldx = land_data[i * 10 + static_cast(k) * 2 + 0]; + const float ldy = land_data[i * 10 + static_cast(k) * 2 + 1]; + const float lx = (px + ldx * kVariance[0] * pw) * scale_w / resize_factor; + const float ly = (py + ldy * kVariance[0] * ph) * scale_h / resize_factor; + landmarks.emplace_back(lx, ly); + } + decoded_landmarks.push_back(std::move(landmarks)); + } + + // Apply Non-Maximum Suppression + std::vector boxes_for_nms; + boxes_for_nms.reserve(decoded_boxes.size()); + + for (const auto& box : decoded_boxes) { + boxes_for_nms.emplace_back(box.x, box.y, box.width, box.height); + } + + std::vector nms_indices; + cv::dnn::NMSBoxes(boxes_for_nms, scores, confidence_threshold_, nms_threshold_, nms_indices); + + // Build final results + std::vector results; + results.reserve(nms_indices.size()); + + for (const int idx : nms_indices) { + const auto uidx = static_cast(idx); + results.push_back({decoded_boxes[uidx], scores[uidx], decoded_landmarks[uidx]}); + } + + return results; +} + +} // namespace uniface