3 Commits

Author SHA1 Message Date
yakhyo
cb81d2fcf8 fix: Cleaning up excessive comments 2026-03-11 12:22:42 +09:00
yakhyo
f0bae6dd80 feat: Add landmark and recognition models to uniface-cpp 2026-03-11 12:22:42 +09:00
yakhyo
eec8f99850 feat: Add uniface cpp support for detection only 2026-03-11 12:22:42 +09:00
23 changed files with 1638 additions and 0 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
tmp_*
.vscode/
*.onnx
# Byte-compiled / optimized / DLL files
__pycache__/

180
uniface-cpp/.clang-format Normal file
View File

@@ -0,0 +1,180 @@
---
# Modern C++ style based on Google with enhancements
Language: Cpp
Standard: c++17
BasedOnStyle: Google
ColumnLimit: 100
IndentWidth: 4
TabWidth: 4
UseTab: Never
# Access modifiers
AccessModifierOffset: -4
IndentAccessModifiers: false
# Alignment
AlignAfterOpenBracket: BlockIndent
AlignArrayOfStructures: Right
AlignConsecutiveAssignments:
Enabled: false
AlignConsecutiveBitFields:
Enabled: true
AlignConsecutiveDeclarations:
Enabled: false
AlignConsecutiveMacros:
Enabled: true
AlignEscapedNewlines: Left
AlignOperands: AlignAfterOperator
AlignTrailingComments:
Kind: Always
OverEmptyLines: 1
# Arguments and parameters
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
BinPackArguments: false
BinPackParameters: false
# Short forms
AllowShortBlocksOnASingleLine: Empty
AllowShortCaseLabelsOnASingleLine: false
AllowShortEnumsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
# Break behavior
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BreakAfterAttributes: Leave
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeConceptDeclarations: Always
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeComma
BreakStringLiterals: true
# Braces
InsertBraces: false
RemoveBracesLLVM: false
# Constructors
PackConstructorInitializers: CurrentLine
ConstructorInitializerIndentWidth: 4
# Empty lines
EmptyLineAfterAccessModifier: Never
EmptyLineBeforeAccessModifier: LogicalBlock
KeepEmptyLinesAtTheStartOfBlocks: false
MaxEmptyLinesToKeep: 1
SeparateDefinitionBlocks: Always
# Includes
IncludeBlocks: Regroup
IncludeCategories:
# Main header (same name as source file)
- Regex: '^"([a-zA-Z0-9_]+)\.(h|hpp)"$'
Priority: 1
SortPriority: 1
CaseSensitive: true
# Project headers
- Regex: '^".*"$'
Priority: 2
SortPriority: 2
# C system headers
- Regex: '^<(assert|complex|ctype|errno|fenv|float|inttypes|iso646|limits|locale|math|setjmp|signal|stdalign|stdarg|stdatomic|stdbool|stddef|stdint|stdio|stdlib|stdnoreturn|string|tgmath|threads|time|uchar|wchar|wctype)\.h>$'
Priority: 3
SortPriority: 3
# C++ standard library
- Regex: '^<[a-z_]+>$'
Priority: 4
SortPriority: 4
# External libraries
- Regex: '^<.*>$'
Priority: 5
SortPriority: 5
SortIncludes: CaseSensitive
# Indentation
IndentCaseBlocks: false
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentRequiresClause: true
IndentWrappedFunctionNames: false
# Lambdas
LambdaBodyIndentation: Signature
# Namespaces
CompactNamespaces: false
FixNamespaceComments: true
NamespaceIndentation: None
ShortNamespaceLines: 0
# Penalties (guide formatting decisions)
PenaltyBreakAssignment: 25
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakOpenParenthesis: 0
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyIndentedWhitespace: 0
PenaltyReturnTypeOnItsOwnLine: 200
# Pointers and references
DerivePointerAlignment: false
PointerAlignment: Left
ReferenceAlignment: Pointer
QualifierAlignment: Leave
# Requires clause (C++20 concepts)
RequiresClausePosition: OwnLine
RequiresExpressionIndentation: OuterScope
# Spacing
BitFieldColonSpacing: Both
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInContainerLiterals: false
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
# Other
Cpp11BracedListStyle: true
InsertNewlineAtEOF: true
InsertTrailingCommas: None
IntegerLiteralSeparator:
Binary: 4
Decimal: 3
Hex: 4
ReflowComments: true
RemoveSemicolon: false
SortUsingDeclarations: LexicographicNumeric
...

View File

@@ -0,0 +1,51 @@
cmake_minimum_required(VERSION 3.14)
project(uniface
VERSION 1.0.0
DESCRIPTION "Uniface C++ face analysis library"
LANGUAGES CXX
)
# Options
option(UNIFACE_BUILD_EXAMPLES "Build example programs" ON)
# C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Compiler warnings
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
elseif(MSVC)
add_compile_options(/W4)
endif()
# Find dependencies
find_package(OpenCV REQUIRED COMPONENTS core imgproc dnn calib3d)
# Library
add_library(uniface
src/utils.cpp
src/detector.cpp
src/recognizer.cpp
src/landmarker.cpp
src/analyzer.cpp
)
target_include_directories(uniface
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)
target_link_libraries(uniface
PUBLIC
${OpenCV_LIBS}
)
# Examples
if(UNIFACE_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()

69
uniface-cpp/README.md Normal file
View File

@@ -0,0 +1,69 @@
# Uniface C++
C++ implementation of the Uniface face analysis library.
## Features
- **Face Detection** - RetinaFace detector with 5-point landmarks
## Requirements
- C++17 compiler
- CMake 3.14+
- OpenCV 4.x
## Build
```bash
mkdir build && cd build
cmake ..
make -j$(nproc)
```
## Usage
### Image Detection
```bash
./examples/detect <model_path> <image_path>
```
### Webcam Demo
```bash
./examples/webcam <model_path> [camera_id]
```
### Code Example
```cpp
#include <uniface/uniface.hpp>
#include <opencv2/highgui.hpp>
int main() {
uniface::RetinaFace detector("retinaface.onnx");
cv::Mat image = cv::imread("photo.jpg");
auto faces = detector.detect(image);
for (const auto& face : faces) {
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
}
cv::imwrite("result.jpg", image);
return 0;
}
```
## Models
Download models from the main uniface repository or use:
```bash
# RetinaFace MobileNet V2
wget https://github.com/your-repo/uniface/releases/download/v1.0/retinaface_mv2.onnx -P models/
```
## License
Same license as the main uniface project.

View File

@@ -0,0 +1,23 @@
# Examples
find_package(OpenCV REQUIRED COMPONENTS highgui imgcodecs videoio)
# Image detection example
add_executable(detect detect.cpp)
target_link_libraries(detect PRIVATE uniface ${OpenCV_LIBS})
# Face recognition example
add_executable(recognize recognize.cpp)
target_link_libraries(recognize PRIVATE uniface ${OpenCV_LIBS})
# Facial landmarks example
add_executable(landmarks landmarks.cpp)
target_link_libraries(landmarks PRIVATE uniface ${OpenCV_LIBS})
# Face analyzer example
add_executable(analyzer analyzer.cpp)
target_link_libraries(analyzer PRIVATE uniface ${OpenCV_LIBS})
# Webcam example
add_executable(webcam webcam.cpp)
target_link_libraries(webcam PRIVATE uniface ${OpenCV_LIBS})

View File

@@ -0,0 +1,113 @@
#include <iomanip>
#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <uniface/uniface.hpp>
int main(int argc, char** argv) {
if (argc < 3) {
std::cout << "Usage: " << argv[0]
<< " <detector_model> <image_path> [recognizer_model] [landmark_model]"
<< std::endl;
std::cout << "\nAnalyzes faces in an image using available models." << std::endl;
std::cout << " - detector_model: Required. Path to face detector ONNX model." << std::endl;
std::cout << " - recognizer_model: Optional. Path to face recognizer ONNX model."
<< std::endl;
std::cout << " - landmark_model: Optional. Path to 106-point landmark ONNX model."
<< std::endl;
return 1;
}
const std::string detector_path = argv[1];
const std::string image_path = argv[2];
const std::string recognizer_path = (argc > 3) ? argv[3] : "";
const std::string landmark_path = (argc > 4) ? argv[4] : "";
try {
// Create analyzer and load components
uniface::FaceAnalyzer analyzer;
std::cout << "Loading detector: " << detector_path << std::endl;
analyzer.loadDetector(detector_path);
if (!recognizer_path.empty()) {
std::cout << "Loading recognizer: " << recognizer_path << std::endl;
analyzer.loadRecognizer(recognizer_path);
}
if (!landmark_path.empty()) {
std::cout << "Loading landmarker: " << landmark_path << std::endl;
analyzer.loadLandmarker(landmark_path);
}
// Load image
cv::Mat image = cv::imread(image_path);
if (image.empty()) {
std::cerr << "Failed to load image: " << image_path << std::endl;
return 1;
}
std::cout << "\nAnalyzing image..." << std::endl;
// Analyze faces
auto results = analyzer.analyze(image);
std::cout << "Found " << results.size() << " face(s)\n" << std::endl;
// Process each face
for (size_t i = 0; i < results.size(); ++i) {
const auto& result = results[i];
std::cout << "Face " << (i + 1) << ":" << std::endl;
std::cout << " BBox: [" << result.face.bbox.x << ", " << result.face.bbox.y << ", "
<< result.face.bbox.width << ", " << result.face.bbox.height << "]"
<< std::endl;
std::cout << std::fixed << std::setprecision(3);
std::cout << " Confidence: " << result.face.confidence << std::endl;
// Draw bounding box
cv::rectangle(image, result.face.bbox, cv::Scalar(0, 255, 0), 2);
// Draw 5-point landmarks from detector
for (const auto& pt : result.face.landmarks) {
cv::circle(image, pt, 3, cv::Scalar(0, 0, 255), -1);
}
// If 106-point landmarks available
if (result.landmarks) {
std::cout << " Landmarks: 106 points detected" << std::endl;
for (const auto& pt : result.landmarks->points) {
cv::circle(image, pt, 1, cv::Scalar(0, 255, 255), -1);
}
}
// If embedding available
if (result.embedding) {
// Show first few values of embedding
std::cout << " Embedding: [";
for (size_t j = 0; j < 5; ++j) {
std::cout << (*result.embedding)[j];
if (j < 4)
std::cout << ", ";
}
std::cout << ", ... ] (512-dim)" << std::endl;
}
std::cout << std::endl;
}
// Save result
cv::imwrite("analyzer_result.jpg", image);
std::cout << "Saved result to analyzer_result.jpg" << std::endl;
} catch (const cv::Exception& e) {
std::cerr << "OpenCV Error: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}

View File

@@ -0,0 +1,48 @@
#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <uniface/uniface.hpp>
int main(int argc, char** argv) {
if (argc < 3) {
std::cout << "Usage: " << argv[0] << " <model_path> <image_path>" << std::endl;
return 1;
}
const std::string model_path = argv[1];
const std::string image_path = argv[2];
try {
uniface::RetinaFace detector(model_path);
cv::Mat image = cv::imread(image_path);
if (image.empty()) {
std::cerr << "Failed to load image: " << image_path << std::endl;
return 1;
}
const auto faces = detector.detect(image);
std::cout << "Detected " << faces.size() << " faces." << std::endl;
// Draw results
for (const auto& face : faces) {
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
for (const auto& pt : face.landmarks) {
cv::circle(image, pt, 2, cv::Scalar(0, 0, 255), -1);
}
}
cv::imwrite("result.jpg", image);
std::cout << "Saved result to result.jpg" << std::endl;
} catch (const cv::Exception& e) {
std::cerr << "OpenCV Error: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}

View File

@@ -0,0 +1,66 @@
#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <uniface/uniface.hpp>
int main(int argc, char** argv) {
if (argc < 4) {
std::cout << "Usage: " << argv[0] << " <detector_model> <landmark_model> <image_path>"
<< std::endl;
std::cout << "\nDetects 106-point facial landmarks and saves visualization." << std::endl;
return 1;
}
const std::string detector_path = argv[1];
const std::string landmark_path = argv[2];
const std::string image_path = argv[3];
try {
// Load models
uniface::RetinaFace detector(detector_path);
uniface::Landmark106 landmarker(landmark_path);
// Load image
cv::Mat image = cv::imread(image_path);
if (image.empty()) {
std::cerr << "Failed to load image: " << image_path << std::endl;
return 1;
}
// Detect faces
auto faces = detector.detect(image);
std::cout << "Detected " << faces.size() << " face(s)" << std::endl;
// Process each face
for (size_t i = 0; i < faces.size(); ++i) {
const auto& face = faces[i];
// Draw bounding box
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
// Get 106-point landmarks
auto landmarks = landmarker.getLandmarks(image, face.bbox);
// Draw all 106 points
for (const auto& pt : landmarks.points) {
cv::circle(image, pt, 1, cv::Scalar(0, 255, 255), -1);
}
std::cout << "Face " << (i + 1) << ": 106 landmarks detected" << std::endl;
}
// Save result
cv::imwrite("landmarks_result.jpg", image);
std::cout << "Saved result to landmarks_result.jpg" << std::endl;
} catch (const cv::Exception& e) {
std::cerr << "OpenCV Error: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}

View File

@@ -0,0 +1,81 @@
#include <iomanip>
#include <iostream>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <uniface/uniface.hpp>
int main(int argc, char** argv) {
if (argc < 5) {
std::cout << "Usage: " << argv[0]
<< " <detector_model> <recognizer_model> <image1> <image2>" << std::endl;
std::cout << "\nCompares faces from two images and outputs similarity score." << std::endl;
return 1;
}
const std::string detector_path = argv[1];
const std::string recognizer_path = argv[2];
const std::string image1_path = argv[3];
const std::string image2_path = argv[4];
try {
// Load models
uniface::RetinaFace detector(detector_path);
uniface::ArcFace recognizer(recognizer_path);
// Load images
cv::Mat image1 = cv::imread(image1_path);
cv::Mat image2 = cv::imread(image2_path);
if (image1.empty()) {
std::cerr << "Failed to load image: " << image1_path << std::endl;
return 1;
}
if (image2.empty()) {
std::cerr << "Failed to load image: " << image2_path << std::endl;
return 1;
}
// Detect faces
auto faces1 = detector.detect(image1);
auto faces2 = detector.detect(image2);
if (faces1.empty()) {
std::cerr << "No face detected in image1" << std::endl;
return 1;
}
if (faces2.empty()) {
std::cerr << "No face detected in image2" << std::endl;
return 1;
}
std::cout << "Detected " << faces1.size() << " face(s) in image1" << std::endl;
std::cout << "Detected " << faces2.size() << " face(s) in image2" << std::endl;
// Get embeddings for first face in each image
auto embedding1 = recognizer.getNormalizedEmbedding(image1, faces1[0].landmarks);
auto embedding2 = recognizer.getNormalizedEmbedding(image2, faces2[0].landmarks);
// Compute similarity
float similarity = uniface::cosineSimilarity(embedding1, embedding2);
std::cout << std::fixed << std::setprecision(4);
std::cout << "\nCosine Similarity: " << similarity << std::endl;
// Interpretation
if (similarity > 0.4f) {
std::cout << "Result: Same person (similarity > 0.4)" << std::endl;
} else {
std::cout << "Result: Different persons (similarity <= 0.4)" << std::endl;
}
} catch (const cv::Exception& e) {
std::cerr << "OpenCV Error: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}

View File

@@ -0,0 +1,209 @@
#include <chrono>
#include <iostream>
#include <memory>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/videoio.hpp>
#include <uniface/uniface.hpp>
int main(int argc, char** argv) {
if (argc < 2) {
std::cout << "Usage: " << argv[0] << " <detector_model> [landmark_model] [camera_id]"
<< std::endl;
std::cout << "\nArguments:" << std::endl;
std::cout << " detector_model : Path to face detector ONNX model (required)" << std::endl;
std::cout << " landmark_model : Path to 106-point landmark ONNX model (optional)"
<< std::endl;
std::cout << " camera_id : Camera device ID, default 0 (optional)" << std::endl;
std::cout << "\nExamples:" << std::endl;
std::cout << " " << argv[0] << " detector.onnx" << std::endl;
std::cout << " " << argv[0] << " detector.onnx landmark.onnx" << std::endl;
std::cout << " " << argv[0] << " detector.onnx landmark.onnx 1" << std::endl;
return 1;
}
const std::string detector_path = argv[1];
std::string landmark_path;
int camera_id = 0;
// Parse arguments - landmark_model is optional
if (argc >= 3) {
// Check if argv[2] is a number (camera_id) or a path (landmark_model)
if (std::isdigit(argv[2][0]) && strlen(argv[2]) <= 2) {
camera_id = std::atoi(argv[2]);
} else {
landmark_path = argv[2];
if (argc >= 4) {
camera_id = std::atoi(argv[3]);
}
}
}
try {
// Load detector
std::cout << "Loading detector: " << detector_path << std::endl;
uniface::RetinaFace detector(detector_path);
std::cout << "Detector loaded!" << std::endl;
// Load landmark model if provided
std::unique_ptr<uniface::Landmark106> landmarker;
if (!landmark_path.empty()) {
std::cout << "Loading landmarker: " << landmark_path << std::endl;
landmarker = std::make_unique<uniface::Landmark106>(landmark_path);
std::cout << "Landmarker loaded!" << std::endl;
}
// Open camera
cv::VideoCapture cap(camera_id);
if (!cap.isOpened()) {
std::cerr << "Error: Cannot open camera " << camera_id << std::endl;
return 1;
}
const int frame_width = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
const int frame_height = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
std::cout << "\nCamera opened: " << frame_width << "x" << frame_height << std::endl;
std::cout << "Press 'q' to quit, 's' to save screenshot, 'l' to toggle landmarks"
<< std::endl;
cv::Mat frame;
int frame_count = 0;
double total_time = 0.0;
bool show_landmarks = true; // Toggle for 106-point landmarks
while (true) {
cap >> frame;
if (frame.empty()) {
std::cerr << "Error: Empty frame captured" << std::endl;
break;
}
const auto start = std::chrono::high_resolution_clock::now();
// Detect faces
const auto faces = detector.detect(frame);
// Get 106-point landmarks if available
std::vector<uniface::Landmarks> all_landmarks;
if (landmarker && show_landmarks) {
all_landmarks.reserve(faces.size());
for (const auto& face : faces) {
all_landmarks.push_back(landmarker->getLandmarks(frame, face.bbox));
}
}
const auto end = std::chrono::high_resolution_clock::now();
const std::chrono::duration<double, std::milli> elapsed = end - start;
const double inference_time = elapsed.count();
++frame_count;
total_time += inference_time;
const double avg_time = total_time / static_cast<double>(frame_count);
const double fps = 1000.0 / avg_time;
// Draw results
for (size_t i = 0; i < faces.size(); ++i) {
const auto& face = faces[i];
// Draw bounding box
cv::rectangle(frame, face.bbox, cv::Scalar(0, 255, 0), 2);
// Draw 5-point landmarks from detector
for (size_t j = 0; j < face.landmarks.size(); ++j) {
cv::Scalar color;
if (j < 2) {
color = cv::Scalar(255, 0, 0); // Eyes - Blue
} else if (j == 2) {
color = cv::Scalar(0, 255, 0); // Nose - Green
} else {
color = cv::Scalar(0, 0, 255); // Mouth - Red
}
cv::circle(frame, face.landmarks[j], 3, color, -1);
}
// Draw 106-point landmarks if available
if (i < all_landmarks.size()) {
const auto& lm = all_landmarks[i];
// Draw all 106 points
for (const auto& pt : lm.points) {
cv::circle(frame, pt, 1, cv::Scalar(0, 255, 255), -1);
}
}
// Draw confidence
const std::string conf_text = cv::format("%.2f", face.confidence);
const cv::Point text_org(
static_cast<int>(face.bbox.x), static_cast<int>(face.bbox.y) - 5
);
cv::putText(
frame,
conf_text,
text_org,
cv::FONT_HERSHEY_SIMPLEX,
0.5,
cv::Scalar(0, 255, 0),
1
);
}
// Draw info overlay
std::string mode = landmarker
? (show_landmarks ? "Detection + 106 Landmarks" : "Detection Only")
: "Detection Only";
const std::string info_text = cv::format(
"FPS: %.1f | Faces: %zu | Time: %.1fms", fps, faces.size(), inference_time
);
cv::putText(
frame,
info_text,
cv::Point(10, 30),
cv::FONT_HERSHEY_SIMPLEX,
0.7,
cv::Scalar(0, 255, 0),
2
);
cv::putText(
frame,
mode,
cv::Point(10, 60),
cv::FONT_HERSHEY_SIMPLEX,
0.6,
cv::Scalar(255, 255, 0),
2
);
cv::imshow("Uniface - Face Detection & Landmarks", frame);
const char key = static_cast<char>(cv::waitKey(1));
if (key == 'q' || key == 27) {
break;
} else if (key == 's') {
const std::string filename = cv::format("screenshot_%d.jpg", frame_count);
cv::imwrite(filename, frame);
std::cout << "Screenshot saved: " << filename << std::endl;
} else if (key == 'l' && landmarker) {
show_landmarks = !show_landmarks;
std::cout << "106-point landmarks: " << (show_landmarks ? "ON" : "OFF")
<< std::endl;
}
}
cap.release();
cv::destroyAllWindows();
std::cout << "\n=== Statistics ===" << std::endl;
std::cout << "Total frames: " << frame_count << std::endl;
std::cout << "Average inference time: " << (total_time / frame_count) << " ms" << std::endl;
} catch (const cv::Exception& e) {
std::cerr << "OpenCV Error: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}

View File

@@ -0,0 +1,63 @@
#ifndef UNIFACE_ANALYZER_HPP_
#define UNIFACE_ANALYZER_HPP_
#include "uniface/detector.hpp"
#include "uniface/landmarker.hpp"
#include "uniface/recognizer.hpp"
#include "uniface/types.hpp"
#include <memory>
#include <optional>
#include <string>
#include <vector>
namespace uniface {
// Result of face analysis
struct AnalyzedFace {
Face face; // detection result (bbox, confidence, 5-point landmarks)
std::optional<Landmarks> landmarks; // 106-point landmarks (if landmarker loaded)
std::optional<Embedding> embedding; // face embedding (if recognizer loaded)
};
// Unified face analysis combining detection, recognition, and landmarks
class FaceAnalyzer {
public:
FaceAnalyzer() = default;
~FaceAnalyzer() = default;
FaceAnalyzer(const FaceAnalyzer&) = delete;
FaceAnalyzer& operator=(const FaceAnalyzer&) = delete;
FaceAnalyzer(FaceAnalyzer&&) = default;
FaceAnalyzer& operator=(FaceAnalyzer&&) = default;
// Load components (returns *this for chaining)
FaceAnalyzer& loadDetector(const std::string& path, const DetectorConfig& config = DetectorConfig{});
FaceAnalyzer& loadRecognizer(const std::string& path, const RecognizerConfig& config = RecognizerConfig{});
FaceAnalyzer& loadLandmarker(const std::string& path, const LandmarkerConfig& config = LandmarkerConfig{});
// Analyze faces in BGR image (throws if detector not loaded)
[[nodiscard]] std::vector<AnalyzedFace> analyze(const cv::Mat& image);
// Component checks
[[nodiscard]] bool hasDetector() const noexcept { return detector_ != nullptr; }
[[nodiscard]] bool hasRecognizer() const noexcept { return recognizer_ != nullptr; }
[[nodiscard]] bool hasLandmarker() const noexcept { return landmarker_ != nullptr; }
// Direct component access
[[nodiscard]] RetinaFace* detector() noexcept { return detector_.get(); }
[[nodiscard]] ArcFace* recognizer() noexcept { return recognizer_.get(); }
[[nodiscard]] Landmark106* landmarker() noexcept { return landmarker_.get(); }
[[nodiscard]] const RetinaFace* detector() const noexcept { return detector_.get(); }
[[nodiscard]] const ArcFace* recognizer() const noexcept { return recognizer_.get(); }
[[nodiscard]] const Landmark106* landmarker() const noexcept { return landmarker_.get(); }
private:
std::unique_ptr<RetinaFace> detector_;
std::unique_ptr<ArcFace> recognizer_;
std::unique_ptr<Landmark106> landmarker_;
};
} // namespace uniface
#endif // UNIFACE_ANALYZER_HPP_

View File

@@ -0,0 +1,47 @@
#ifndef UNIFACE_DETECTOR_HPP_
#define UNIFACE_DETECTOR_HPP_
#include "uniface/types.hpp"
#include <array>
#include <string>
#include <vector>
#include <opencv2/dnn.hpp>
namespace uniface {
// RetinaFace detector using OpenCV DNN backend
class RetinaFace {
public:
explicit RetinaFace(
const std::string& model_path,
float conf_thresh = 0.5f,
float nms_thresh = 0.4f,
cv::Size input_size = cv::Size(640, 640)
);
// Detect faces in BGR image, returns bboxes + 5-point landmarks
[[nodiscard]] std::vector<Face> detect(const cv::Mat& image);
// Accessors
[[nodiscard]] float getConfidenceThreshold() const noexcept { return confidence_threshold_; }
[[nodiscard]] float getNmsThreshold() const noexcept { return nms_threshold_; }
[[nodiscard]] cv::Size getInputSize() const noexcept { return input_size_; }
void setConfidenceThreshold(float threshold) noexcept { confidence_threshold_ = threshold; }
void setNmsThreshold(float threshold) noexcept { nms_threshold_ = threshold; }
private:
cv::dnn::Net net_;
float confidence_threshold_;
float nms_threshold_;
cv::Size input_size_;
std::vector<std::array<float, 4>> anchors_;
void generateAnchors();
};
} // namespace uniface
#endif // UNIFACE_DETECTOR_HPP_

View File

@@ -0,0 +1,32 @@
#ifndef UNIFACE_LANDMARKER_HPP_
#define UNIFACE_LANDMARKER_HPP_
#include "uniface/types.hpp"
#include <string>
#include <opencv2/dnn.hpp>
namespace uniface {
// 106-point facial landmark detector
class Landmark106 {
public:
explicit Landmark106(const std::string& model_path, const LandmarkerConfig& config = LandmarkerConfig{});
// Detect 106 landmarks for a face, returns points in original image coordinates
[[nodiscard]] Landmarks getLandmarks(const cv::Mat& image, const cv::Rect2f& bbox);
[[nodiscard]] cv::Size getInputSize() const noexcept { return config_.input_size; }
private:
cv::dnn::Net net_;
LandmarkerConfig config_;
[[nodiscard]] cv::Mat preprocess(const cv::Mat& image, const cv::Rect2f& bbox, cv::Mat& transform);
[[nodiscard]] Landmarks postprocess(const cv::Mat& predictions, const cv::Mat& transform);
};
} // namespace uniface
#endif // UNIFACE_LANDMARKER_HPP_

View File

@@ -0,0 +1,37 @@
#ifndef UNIFACE_RECOGNIZER_HPP_
#define UNIFACE_RECOGNIZER_HPP_
#include "uniface/types.hpp"
#include <string>
#include <opencv2/dnn.hpp>
namespace uniface {
// ArcFace face recognition (MobileNet/ResNet backbones)
class ArcFace {
public:
explicit ArcFace(const std::string& model_path, const RecognizerConfig& config = RecognizerConfig{});
// Get 512-dim embedding from pre-aligned 112x112 face
[[nodiscard]] Embedding getEmbedding(const cv::Mat& aligned_face);
// Get 512-dim embedding with automatic alignment
[[nodiscard]] Embedding getEmbedding(const cv::Mat& image, const std::array<cv::Point2f, 5>& landmarks);
// Get L2-normalized embedding with automatic alignment
[[nodiscard]] Embedding getNormalizedEmbedding(const cv::Mat& image, const std::array<cv::Point2f, 5>& landmarks);
[[nodiscard]] cv::Size getInputSize() const noexcept { return config_.input_size; }
private:
cv::dnn::Net net_;
RecognizerConfig config_;
[[nodiscard]] cv::Mat preprocess(const cv::Mat& face_image);
};
} // namespace uniface
#endif // UNIFACE_RECOGNIZER_HPP_

View File

@@ -0,0 +1,45 @@
#ifndef UNIFACE_TYPES_HPP_
#define UNIFACE_TYPES_HPP_
#include <array>
#include <vector>
#include <opencv2/core.hpp>
namespace uniface {
// Detected face with bbox, confidence, and 5-point landmarks
struct Face {
cv::Rect2f bbox;
float confidence;
std::array<cv::Point2f, 5> landmarks; // left_eye, right_eye, nose, left_mouth, right_mouth
};
// 512-dimensional face embedding
using Embedding = std::array<float, 512>;
// 106-point facial landmarks
struct Landmarks {
std::array<cv::Point2f, 106> points;
};
// Configuration structs
struct DetectorConfig {
float conf_thresh = 0.5f;
float nms_thresh = 0.4f;
cv::Size input_size = cv::Size(640, 640);
};
struct RecognizerConfig {
float input_mean = 127.5f;
float input_std = 127.5f;
cv::Size input_size = cv::Size(112, 112);
};
struct LandmarkerConfig {
cv::Size input_size = cv::Size(192, 192);
};
} // namespace uniface
#endif // UNIFACE_TYPES_HPP_

View File

@@ -0,0 +1,11 @@
#ifndef UNIFACE_HPP_
#define UNIFACE_HPP_
#include "uniface/analyzer.hpp"
#include "uniface/detector.hpp"
#include "uniface/landmarker.hpp"
#include "uniface/recognizer.hpp"
#include "uniface/types.hpp"
#include "uniface/utils.hpp"
#endif // UNIFACE_HPP_

View File

@@ -0,0 +1,58 @@
#ifndef UNIFACE_UTILS_HPP_
#define UNIFACE_UTILS_HPP_
#include "uniface/types.hpp"
#include <array>
#include <cmath>
#include <opencv2/core.hpp>
namespace uniface {
// Reference 5-point landmarks for ArcFace alignment (112x112)
inline constexpr std::array<float, 10> kReferenceAlignment = {
38.2946f, 51.6963f, // left eye
73.5318f, 51.5014f, // right eye
56.0252f, 71.7366f, // nose
41.5493f, 92.3655f, // left mouth
70.7299f, 92.2041f // right mouth
};
// Align face using 5-point landmarks (default 112x112 for ArcFace)
[[nodiscard]] cv::Mat alignFace(
const cv::Mat& image,
const std::array<cv::Point2f, 5>& landmarks,
cv::Size output_size = cv::Size(112, 112)
);
// Cosine similarity between embeddings, returns [-1, 1]
[[nodiscard]] float cosineSimilarity(const Embedding& a, const Embedding& b) noexcept;
// Apply 2x3 affine transform to points
template <size_t N>
[[nodiscard]] std::array<cv::Point2f, N> transformPoints2D(
const std::array<cv::Point2f, N>& points, const cv::Mat& transform
) {
std::array<cv::Point2f, N> result{};
for (size_t i = 0; i < N; ++i) {
const float x = points[i].x;
const float y = points[i].y;
result[i].x = static_cast<float>(
transform.at<double>(0, 0) * x + transform.at<double>(0, 1) * y +
transform.at<double>(0, 2)
);
result[i].y = static_cast<float>(
transform.at<double>(1, 0) * x + transform.at<double>(1, 1) * y +
transform.at<double>(1, 2)
);
}
return result;
}
// Letterbox resize preserving aspect ratio, returns scale factor
[[nodiscard]] float letterboxResize(const cv::Mat& src, cv::Mat& dst, cv::Size target_size);
} // namespace uniface
#endif // UNIFACE_UTILS_HPP_

View File

View File

@@ -0,0 +1,55 @@
#include "uniface/analyzer.hpp"
#include <stdexcept>
namespace uniface {
FaceAnalyzer& FaceAnalyzer::loadDetector(const std::string& path, const DetectorConfig& config) {
detector_ = std::make_unique<RetinaFace>(
path, config.conf_thresh, config.nms_thresh, config.input_size
);
return *this;
}
FaceAnalyzer& FaceAnalyzer::loadRecognizer(
const std::string& path, const RecognizerConfig& config
) {
recognizer_ = std::make_unique<ArcFace>(path, config);
return *this;
}
FaceAnalyzer& FaceAnalyzer::loadLandmarker(
const std::string& path, const LandmarkerConfig& config
) {
landmarker_ = std::make_unique<Landmark106>(path, config);
return *this;
}
std::vector<AnalyzedFace> FaceAnalyzer::analyze(const cv::Mat& image) {
if (!detector_) {
throw std::runtime_error("FaceAnalyzer: detector not loaded. Call loadDetector() first.");
}
auto faces = detector_->detect(image);
std::vector<AnalyzedFace> results;
results.reserve(faces.size());
for (const auto& face : faces) {
AnalyzedFace result;
result.face = face;
if (landmarker_) {
result.landmarks = landmarker_->getLandmarks(image, face.bbox);
}
if (recognizer_) {
result.embedding = recognizer_->getNormalizedEmbedding(image, face.landmarks);
}
results.push_back(std::move(result));
}
return results;
}
} // namespace uniface

View File

@@ -0,0 +1,204 @@
#include "uniface/detector.hpp"
#include "uniface/utils.hpp"
#include <cmath>
#include <iostream>
#include <opencv2/imgproc.hpp>
namespace uniface {
namespace {
// Model configuration constants
constexpr std::array<int, 3> kFeatureStrides = {8, 16, 32};
constexpr std::array<float, 2> kVariance = {0.1f, 0.2f};
constexpr int kNumLandmarks = 5;
// BGR mean values for image normalization
constexpr float kMeanB = 104.0f;
constexpr float kMeanG = 117.0f;
constexpr float kMeanR = 123.0f;
// Anchor min sizes for each feature map level
const std::vector<std::vector<int>> kMinSizes = {
{ 16, 32},
{ 64, 128},
{256, 512}
};
} // namespace
RetinaFace::RetinaFace(
const std::string& model_path, float conf_thresh, float nms_thresh, cv::Size input_size
)
: net_(cv::dnn::readNetFromONNX(model_path))
, confidence_threshold_(conf_thresh)
, nms_threshold_(nms_thresh)
, input_size_(input_size) {
generateAnchors();
}
void RetinaFace::generateAnchors() {
anchors_.clear();
size_t estimated_anchors = 0;
for (size_t k = 0; k < kFeatureStrides.size(); ++k) {
const int step = kFeatureStrides[k];
const auto feature_h = static_cast<size_t>(
std::ceil(static_cast<float>(input_size_.height) / static_cast<float>(step))
);
const auto feature_w = static_cast<size_t>(
std::ceil(static_cast<float>(input_size_.width) / static_cast<float>(step))
);
estimated_anchors += feature_h * feature_w * kMinSizes[k].size();
}
anchors_.reserve(estimated_anchors);
for (size_t k = 0; k < kFeatureStrides.size(); ++k) {
const int step = kFeatureStrides[k];
const int feature_h = static_cast<int>(
std::ceil(static_cast<float>(input_size_.height) / static_cast<float>(step))
);
const int feature_w = static_cast<int>(
std::ceil(static_cast<float>(input_size_.width) / static_cast<float>(step))
);
for (int i = 0; i < feature_h; ++i) {
for (int j = 0; j < feature_w; ++j) {
for (const int min_size : kMinSizes[k]) {
const float s_kx = static_cast<float>(min_size) /
static_cast<float>(input_size_.height);
const float s_ky = static_cast<float>(min_size) /
static_cast<float>(input_size_.width);
const float cx = (static_cast<float>(j) + 0.5f) * static_cast<float>(step) /
static_cast<float>(input_size_.height);
const float cy = (static_cast<float>(i) + 0.5f) * static_cast<float>(step) /
static_cast<float>(input_size_.width);
anchors_.push_back({cx, cy, s_kx, s_ky});
}
}
}
}
}
std::vector<Face> RetinaFace::detect(const cv::Mat& image) {
cv::Mat input_blob;
const float resize_factor = letterboxResize(image, input_blob, input_size_);
const cv::Mat blob = cv::dnn::blobFromImage(
input_blob, 1.0, cv::Size(), cv::Scalar(kMeanB, kMeanG, kMeanR), false, false
);
net_.setInput(blob);
const auto output_names = net_.getUnconnectedOutLayersNames();
std::vector<cv::Mat> outputs;
net_.forward(outputs, output_names);
if (outputs.size() < 3) {
std::cerr << "Error: Model output count mismatch. Expected at least 3, got "
<< outputs.size() << std::endl;
return {};
}
// Identify outputs by shape: loc(N,4), conf(N,2), landmarks(N,10)
cv::Mat loc_output, conf_output, land_output;
for (const auto& output : outputs) {
switch (output.size[2]) {
case 4: loc_output = output; break;
case 2: conf_output = output; break;
case 10: land_output = output; break;
default: break;
}
}
// Fallback to positional outputs
if (loc_output.empty()) loc_output = outputs[0];
if (conf_output.empty()) conf_output = outputs[1];
if (land_output.empty()) land_output = outputs[2];
const auto* loc_data = reinterpret_cast<const float*>(loc_output.data);
const auto* conf_data = reinterpret_cast<const float*>(conf_output.data);
const auto* land_data = reinterpret_cast<const float*>(land_output.data);
const auto num_priors = static_cast<size_t>(loc_output.size[1]);
if (num_priors != anchors_.size()) {
std::cerr << "Error: Anchor count mismatch! Expected " << anchors_.size()
<< " anchors but model output has " << num_priors << " priors.\n"
<< "This usually means the input size doesn't match the model's "
<< "expected size." << std::endl;
return {};
}
std::vector<cv::Rect2f> decoded_boxes;
std::vector<float> scores;
std::vector<std::array<cv::Point2f, 5>> decoded_landmarks;
decoded_boxes.reserve(num_priors);
scores.reserve(num_priors);
decoded_landmarks.reserve(num_priors);
const auto scale_w = static_cast<float>(input_size_.width);
const auto scale_h = static_cast<float>(input_size_.height);
for (size_t i = 0; i < num_priors; ++i) {
const float score = conf_data[i * 2 + 1];
if (score < confidence_threshold_) continue;
const float px = anchors_[i][0];
const float py = anchors_[i][1];
const float pw = anchors_[i][2];
const float ph = anchors_[i][3];
const float dx = loc_data[i * 4 + 0];
const float dy = loc_data[i * 4 + 1];
const float dw = loc_data[i * 4 + 2];
const float dh = loc_data[i * 4 + 3];
const float cx = px + dx * kVariance[0] * pw;
const float cy = py + dy * kVariance[0] * ph;
const float w = pw * std::exp(dw * kVariance[1]);
const float h = ph * std::exp(dh * kVariance[1]);
const float x1 = (cx - w / 2.0f) * scale_w / resize_factor;
const float y1 = (cy - h / 2.0f) * scale_h / resize_factor;
const float x2 = (cx + w / 2.0f) * scale_w / resize_factor;
const float y2 = (cy + h / 2.0f) * scale_h / resize_factor;
decoded_boxes.emplace_back(x1, y1, x2 - x1, y2 - y1);
scores.push_back(score);
std::array<cv::Point2f, 5> landmarks{};
for (int k = 0; k < kNumLandmarks; ++k) {
const float ldx = land_data[i * 10 + static_cast<size_t>(k) * 2 + 0];
const float ldy = land_data[i * 10 + static_cast<size_t>(k) * 2 + 1];
const float lx = (px + ldx * kVariance[0] * pw) * scale_w / resize_factor;
const float ly = (py + ldy * kVariance[0] * ph) * scale_h / resize_factor;
landmarks[static_cast<size_t>(k)] = cv::Point2f(lx, ly);
}
decoded_landmarks.push_back(landmarks);
}
// NMS
std::vector<cv::Rect2d> boxes_for_nms;
boxes_for_nms.reserve(decoded_boxes.size());
for (const auto& box : decoded_boxes) {
boxes_for_nms.emplace_back(box.x, box.y, box.width, box.height);
}
std::vector<int> nms_indices;
cv::dnn::NMSBoxes(boxes_for_nms, scores, confidence_threshold_, nms_threshold_, nms_indices);
std::vector<Face> results;
results.reserve(nms_indices.size());
for (const int idx : nms_indices) {
const auto uidx = static_cast<size_t>(idx);
results.push_back({decoded_boxes[uidx], scores[uidx], decoded_landmarks[uidx]});
}
return results;
}
} // namespace uniface

View File

@@ -0,0 +1,90 @@
#include "uniface/landmarker.hpp"
#include <cmath>
#include <opencv2/imgproc.hpp>
namespace uniface {
namespace {
constexpr int kNumLandmarks = 106;
cv::Mat computeCenterTransform(const cv::Point2f& center, float scale, int output_size) {
cv::Mat transform = cv::Mat::zeros(2, 3, CV_64F);
transform.at<double>(0, 0) = scale;
transform.at<double>(1, 1) = scale;
transform.at<double>(0, 2) = -center.x * scale + output_size / 2.0;
transform.at<double>(1, 2) = -center.y * scale + output_size / 2.0;
return transform;
}
} // namespace
Landmark106::Landmark106(const std::string& model_path, const LandmarkerConfig& config)
: net_(cv::dnn::readNetFromONNX(model_path))
, config_(config) {}
cv::Mat Landmark106::preprocess(const cv::Mat& image, const cv::Rect2f& bbox, cv::Mat& transform) {
const float width = bbox.width;
const float height = bbox.height;
const float center_x = bbox.x + width / 2.0f;
const float center_y = bbox.y + height / 2.0f;
const float max_dim = std::max(width, height);
const float scale = static_cast<float>(config_.input_size.width) / (max_dim * 1.5f);
transform = computeCenterTransform(cv::Point2f(center_x, center_y), scale, config_.input_size.width);
cv::Mat aligned;
cv::warpAffine(image, aligned, transform, config_.input_size, cv::INTER_LINEAR, cv::BORDER_CONSTANT);
cv::Mat blob = cv::dnn::blobFromImage(aligned, 1.0, config_.input_size, cv::Scalar(0, 0, 0), true, false);
return blob;
}
Landmarks Landmark106::postprocess(const cv::Mat& predictions, const cv::Mat& transform) {
Landmarks result{};
const auto* pred_data = reinterpret_cast<const float*>(predictions.data);
cv::Mat inverse_transform;
cv::invertAffineTransform(transform, inverse_transform);
const int input_size = config_.input_size.width;
const float half_size = static_cast<float>(input_size) / 2.0f;
for (int i = 0; i < kNumLandmarks; ++i) {
// Denormalize from [-1, 1] to pixel coordinates
float x = (pred_data[i * 2 + 0] + 1.0f) * half_size;
float y = (pred_data[i * 2 + 1] + 1.0f) * half_size;
// Transform back to original image coordinates
const float orig_x = static_cast<float>(
inverse_transform.at<double>(0, 0) * x + inverse_transform.at<double>(0, 1) * y +
inverse_transform.at<double>(0, 2)
);
const float orig_y = static_cast<float>(
inverse_transform.at<double>(1, 0) * x + inverse_transform.at<double>(1, 1) * y +
inverse_transform.at<double>(1, 2)
);
result.points[static_cast<size_t>(i)] = cv::Point2f(orig_x, orig_y);
}
return result;
}
Landmarks Landmark106::getLandmarks(const cv::Mat& image, const cv::Rect2f& bbox) {
cv::Mat transform;
cv::Mat blob = preprocess(image, bbox, transform);
net_.setInput(blob);
cv::Mat output = net_.forward();
return postprocess(output, transform);
}
} // namespace uniface

View File

@@ -0,0 +1,73 @@
#include "uniface/recognizer.hpp"
#include "uniface/utils.hpp"
#include <cmath>
#include <opencv2/imgproc.hpp>
namespace uniface {
ArcFace::ArcFace(const std::string& model_path, const RecognizerConfig& config)
: net_(cv::dnn::readNetFromONNX(model_path))
, config_(config) {}
cv::Mat ArcFace::preprocess(const cv::Mat& face_image) {
cv::Mat resized;
if (face_image.size() != config_.input_size) {
cv::resize(face_image, resized, config_.input_size);
} else {
resized = face_image;
}
// Normalize: (pixel - mean) / std, BGR -> RGB
cv::Mat blob = cv::dnn::blobFromImage(
resized, 1.0 / config_.input_std, config_.input_size,
cv::Scalar(config_.input_mean, config_.input_mean, config_.input_mean), true, false
);
return blob;
}
Embedding ArcFace::getEmbedding(const cv::Mat& aligned_face) {
cv::Mat blob = preprocess(aligned_face);
net_.setInput(blob);
cv::Mat output = net_.forward();
Embedding embedding{};
const auto* output_data = reinterpret_cast<const float*>(output.data);
const size_t embedding_size = std::min(static_cast<size_t>(output.total()), embedding.size());
for (size_t i = 0; i < embedding_size; ++i) {
embedding[i] = output_data[i];
}
return embedding;
}
Embedding ArcFace::getEmbedding(const cv::Mat& image, const std::array<cv::Point2f, 5>& landmarks) {
cv::Mat aligned = alignFace(image, landmarks, config_.input_size);
return getEmbedding(aligned);
}
Embedding ArcFace::getNormalizedEmbedding(const cv::Mat& image, const std::array<cv::Point2f, 5>& landmarks) {
Embedding embedding = getEmbedding(image, landmarks);
// L2 normalize
float norm = 0.0f;
for (const float val : embedding) {
norm += val * val;
}
norm = std::sqrt(norm);
if (norm > 1e-8f) {
for (float& val : embedding) {
val /= norm;
}
}
return embedding;
}
} // namespace uniface

82
uniface-cpp/src/utils.cpp Normal file
View File

@@ -0,0 +1,82 @@
#include "uniface/utils.hpp"
#include <opencv2/calib3d.hpp>
#include <opencv2/imgproc.hpp>
namespace uniface {
cv::Mat alignFace(const cv::Mat& image, const std::array<cv::Point2f, 5>& landmarks, cv::Size output_size) {
const float ratio = static_cast<float>(output_size.width) / 112.0f;
std::vector<cv::Point2f> dst_points(5);
for (int i = 0; i < 5; ++i) {
dst_points[i].x = kReferenceAlignment[static_cast<size_t>(i) * 2] * ratio;
dst_points[i].y = kReferenceAlignment[static_cast<size_t>(i) * 2 + 1] * ratio;
}
std::vector<cv::Point2f> src_points(landmarks.begin(), landmarks.end());
cv::Mat transform = cv::estimateAffinePartial2D(src_points, dst_points);
if (transform.empty()) {
cv::Mat resized;
cv::resize(image, resized, output_size);
return resized;
}
cv::Mat aligned;
cv::warpAffine(image, aligned, transform, output_size, cv::INTER_LINEAR, cv::BORDER_CONSTANT);
return aligned;
}
float cosineSimilarity(const Embedding& a, const Embedding& b) noexcept {
float dot = 0.0f;
float norm_a = 0.0f;
float norm_b = 0.0f;
for (size_t i = 0; i < a.size(); ++i) {
dot += a[i] * b[i];
norm_a += a[i] * a[i];
norm_b += b[i] * b[i];
}
const float denom = std::sqrt(norm_a) * std::sqrt(norm_b);
if (denom < 1e-8f) {
return 0.0f;
}
return dot / denom;
}
float letterboxResize(const cv::Mat& src, cv::Mat& dst, cv::Size target_size) {
const auto src_height = static_cast<float>(src.rows);
const auto src_width = static_cast<float>(src.cols);
const auto target_height = static_cast<float>(target_size.height);
const auto target_width = static_cast<float>(target_size.width);
const float im_ratio = src_height / src_width;
const float model_ratio = target_height / target_width;
int new_width = 0;
int new_height = 0;
if (im_ratio > model_ratio) {
new_height = static_cast<int>(target_height);
new_width = static_cast<int>(static_cast<float>(new_height) / im_ratio);
} else {
new_width = static_cast<int>(target_width);
new_height = static_cast<int>(static_cast<float>(new_width) * im_ratio);
}
const float resize_factor = static_cast<float>(new_height) / src_height;
cv::Mat resized;
cv::resize(src, resized, cv::Size(new_width, new_height));
dst = cv::Mat::zeros(target_size, src.type());
resized.copyTo(dst(cv::Rect(0, 0, new_width, new_height)));
return resize_factor;
}
} // namespace uniface