diff --git a/README.md b/README.md
index 532b2b2..5ff90c3 100644
--- a/README.md
+++ b/README.md
@@ -19,14 +19,14 @@
## Features
-| Date | Feature Description |
-| ---------- | --------------------------------------------------------------------------------------------------------------- |
-| Planned | 🎭 **Age and Gender Detection**: Planned feature for predicting age and gender from facial images. |
-| Planned | 🧩 **Face Recognition**: Upcoming capability to identify and verify faces. |
-| 2024-11-21 | 🔄 **Face Alignment**: Added precise face alignment for better downstream tasks. |
-| 2024-11-20 | ⚡ **High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. |
-| 2024-11-20 | 🎯 **Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. |
-| 2024-11-20 | 🛠 **API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. |
+| Date | Feature Description |
+| ---------- | --------------------------------------------------------------------------------------------------------------------- |
+| Planned | 🎭**Age and Gender Detection**: Planned feature for predicting age and gender from facial images. |
+| Planned | 🧩**Face Recognition**: Upcoming capability to identify and verify faces. |
+| 2024-11-21 | 🔄**Face Alignment**: Added precise face alignment for better downstream tasks. |
+| 2024-11-20 | ⚡**High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. |
+| 2024-11-20 | 🎯**Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. |
+| 2024-11-20 | 🛠**API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. |
---
@@ -43,7 +43,7 @@ To work with the latest version of **UniFace**, which may not yet be released on
```bash
git clone https://github.com/yakhyo/uniface.git
cd uniface
-pip install .
+pip install -e .
```
---
@@ -179,13 +179,13 @@ cv2.destroyAllWindows()
### Evaluation results of available models on WiderFace
-| RetinaFace Models | Easy | Medium | Hard |
-| ------------------ | ---------- | ---------- | ---------- |
-| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
-| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
-| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
-| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
-| retinaface_r18 | 92.50% | 91.02% | 86.63% |
+| RetinaFace Models | Easy | Medium | Hard |
+| ------------------ | ---------------- | ---------------- | ---------------- |
+| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
+| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
+| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
+| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
+| retinaface_r18 | 92.50% | 91.02% | 86.63% |
| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
diff --git a/docs/installation.md b/docs/installation.md
index c7df120..02ed78f 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,3 +1,37 @@
-# Installation
+# 🚀 Installation
-Instructions to install UniFace.
\ No newline at end of file
+## 📦 Install from PyPI
+
+### CPU-only (default):
+
+```bash
+pip install uniface
+```
+
+This installs the CPU-compatible version of ONNX Runtime (`onnxruntime`) and all core dependencies.
+
+### GPU support:
+
+```bash
+pip install "uniface[gpu]"
+```
+
+This installs `onnxruntime-gpu` for accelerated inference on supported NVIDIA GPUs.
+Make sure your system meets the [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/build/eps.html#cuda).
+
+---
+
+## 🔧 Install from GitHub (latest version)
+
+Clone the repository and install it manually:
+
+```bash
+git clone https://github.com/yakhyo/uniface.git
+cd uniface
+
+# CPU version
+pip install .
+
+# Or with GPU support
+pip install ".[gpu]"
+```
diff --git a/docs/reference/attribute.md b/docs/reference/attribute.md
new file mode 100644
index 0000000..cd17483
--- /dev/null
+++ b/docs/reference/attribute.md
@@ -0,0 +1,9 @@
+# Facial Attribute API Reference
+
+# Age and Gender Model
+
+::: uniface.attribute.age_gender.AgeGender
+
+# Emotion Model
+
+:::uniface.attribute.emotion.Emotion
diff --git a/docs/reference/detection.md b/docs/reference/detection.md
index c013de3..b53a52d 100644
--- a/docs/reference/detection.md
+++ b/docs/reference/detection.md
@@ -1,3 +1,10 @@
-# Detection API Reference
+# Face Detection API Reference
-::: uniface.RetinaFace
+# RetinaFace
+
+::: uniface.detection.retinaface.RetinaFace
+
+
+# SCRFD
+
+::: uniface.detection.scrfd.SCRFD
diff --git a/docs/reference/landmark.md b/docs/reference/landmark.md
new file mode 100644
index 0000000..1ff3165
--- /dev/null
+++ b/docs/reference/landmark.md
@@ -0,0 +1,5 @@
+# Landmark API Reference
+
+# Landmark Model
+
+::: uniface.landmark.model.Landmark
diff --git a/docs/reference/recognition.md b/docs/reference/recognition.md
index 91933e0..5691d71 100644
--- a/docs/reference/recognition.md
+++ b/docs/reference/recognition.md
@@ -1,13 +1,17 @@
-# Recognition API Reference
+# Face Recognition API Reference
+# SphereFace
-# SphereFace Model
::: uniface.recognition.models.SphereFace
+# MobileFace
-# MobileFace Model
:::uniface.recognition.models.MobileFace
+# ArcFace
-# Base Face Encoder Model
-:::uniface.recognition.base.BaseFaceEncoder
\ No newline at end of file
+:::uniface.recognition.models.ArcFace
+
+# BaseFaceEncoder class
+
+:::uniface.recognition.base.BaseFaceEncoder
diff --git a/examples/face_detection.ipynb b/examples/face_detection.ipynb
index f3447b6..6f03d9c 100644
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
@@ -30,7 +30,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -265,7 +265,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "base",
+ "display_name": "uniface",
"language": "python",
"name": "python3"
},
@@ -279,7 +279,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.2"
+ "version": "3.12.11"
}
},
"nbformat": 4,
diff --git a/mkdocs.yml b/mkdocs.yml
index d80ce67..fff0dd9 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -70,6 +70,8 @@ nav:
- API Reference:
- Detection: reference/detection.md
- Recognition: reference/recognition.md
+ - Landmark: reference/landmark.md
+ - Attribute: reference/attribute.md
- About:
- Changelog: about/changelog.md
- License: about/license.md
diff --git a/requirements_mkdocs.txt b/requirements_mkdocs.txt
index ac7dc76..fd499df 100644
--- a/requirements_mkdocs.txt
+++ b/requirements_mkdocs.txt
@@ -1 +1,3 @@
+mkdocs-material
+mkdocs-minify-plugin
mkdocstrings[python]
diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py
index f1b82e4..183aead 100644
--- a/scripts/run_recognition.py
+++ b/scripts/run_recognition.py
@@ -29,11 +29,11 @@ def run_inference(detector, recognizer, image_path):
print(f"Detected {len(boxes)} face(s). Extracting embeddings...")
- for i, landmark in enumerate(landmarks):
+ for i, landmark in enumerate(landmarks[:1]):
embedding = recognizer.get_embedding(image, landmark)
- norm = np.linalg.norm(embedding)
- print(f"\nFace {i} embedding (L2 norm = {norm:.4f}):")
- print(embedding)
+ norm_embedding = recognizer.get_normalized_embedding(image, landmark)
+ print("embedding:", np.sum(embedding))
+ print("norm embedding:",np.sum(norm_embedding))
def main():
diff --git a/uniface/__init__.py b/uniface/__init__.py
index 3250e33..094f77a 100644
--- a/uniface/__init__.py
+++ b/uniface/__init__.py
@@ -15,21 +15,32 @@ __license__ = "MIT"
__author__ = "Yakhyokhuja Valikhujaev"
__version__ = "0.1.8"
+from .detection import detect_faces, create_detector, list_available_detectors
-from uniface.retinaface import RetinaFace
-from uniface.log import Logger
-from uniface.model_store import verify_model_weights
from uniface.face_utils import face_alignment, compute_similarity
+from uniface.model_store import verify_model_weights
from uniface.visualization import draw_detections
+from uniface.log import Logger
+
+
__all__ = [
+ # Metadata
"__version__",
"__author__",
"__license__",
- "RetinaFace",
- "Logger",
- "verify_model_weights",
- "draw_detections",
+
+ # Core functions
+ 'detect_faces',
+ 'create_detector',
+ 'list_available_detectors',
+
+ # Utility functions
"face_alignment",
"compute_similarity",
+ "verify_model_weights",
+ "draw_detections",
+
+ # Classes
+ "Logger",
]
diff --git a/uniface/attribute/age_gender.py b/uniface/attribute/age_gender.py
index faab4fd..efb844e 100644
--- a/uniface/attribute/age_gender.py
+++ b/uniface/attribute/age_gender.py
@@ -1,6 +1,11 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
import cv2
import numpy as np
import onnxruntime as ort
+
from typing import Tuple
from uniface.log import Logger
@@ -14,10 +19,24 @@ __all__ = ["AgeGender"]
class AgeGender:
"""
- Age and Gender Prediction Model.
+ Age and gender prediction model using ONNX Runtime.
- This model predicts both a person's gender (male/female) and age from a facial image.
- Gender is returned as an integer (0: female, 1: male) and age as years.
+ Loads a pretrained ONNX model to predict both age (in years) and gender
+ (0: female, 1: male) from a detected face region. Handles model loading,
+ preprocessing, inference, and output interpretation.
+
+ Attributes:
+ input_size (Tuple[int, int]): Model's expected input resolution (width, height).
+ input_mean (float): Mean value used for input normalization.
+ input_std (float): Standard deviation used for input normalization.
+ model_path (str): Path to the verified ONNX model file.
+ session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
+ input_names (List[str]): List of input node names.
+ output_names (List[str]): List of output node names.
+
+ Args:
+ model_name (AgeGenderWeights): Enum specifying the age-gender model to load.
+ input_size (Tuple[int, int]): Resolution for model input; defaults to (112, 112).
"""
def __init__(
diff --git a/uniface/attribute/emotion.py b/uniface/attribute/emotion.py
index 8d49993..17a3c19 100644
--- a/uniface/attribute/emotion.py
+++ b/uniface/attribute/emotion.py
@@ -3,9 +3,8 @@
# GitHub: https://github.com/yakhyo
import cv2
-import numpy as np
import torch
-from PIL import Image
+import numpy as np
from typing import Tuple, Union
diff --git a/uniface/detection/__init__.py b/uniface/detection/__init__.py
index 85f5beb..95ce09d 100644
--- a/uniface/detection/__init__.py
+++ b/uniface/detection/__init__.py
@@ -1,3 +1,145 @@
-from .retinaface import RetinaFace
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+
+import numpy as np
+from typing import Tuple, Dict, Any, List
+
from .scrfd import SCRFD
-from uniface.visualization import draw_detections
+from .base import BaseDetector
+from .retinaface import RetinaFace
+
+# Global cache for detector instances
+_detector_cache: Dict[str, BaseDetector] = {}
+
+
+def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
+ """
+ High-level face detection function.
+
+ Args:
+ image (np.ndarray): Input image as numpy array.
+ method (str): Detection method to use. Options: 'retinaface', 'scrfd'.
+ **kwargs: Additional arguments passed to the detector.
+
+ Returns:
+ List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
+ - 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
+ - 'confidence' (float): The confidence score of the detection.
+ - 'landmarks' (List[List[float]]): 5-point facial landmarks.
+
+ Example:
+ >>> from uniface import detect_faces
+ >>> image = cv2.imread("your_image.jpg")
+ >>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
+ >>> for face in faces:
+ ... print(f"Found face with confidence: {face['confidence']}")
+ ... print(f"BBox: {face['bbox']}")
+ """
+ method_name = method.lower()
+
+ sorted_kwargs = sorted(kwargs.items())
+ cache_key = f"{method_name}_{str(sorted_kwargs)}"
+
+ if cache_key not in _detector_cache:
+ # Pass kwargs to create the correctly configured detector
+ _detector_cache[cache_key] = create_detector(method, **kwargs)
+
+ detector = _detector_cache[cache_key]
+ return detector.detect(image)
+
+
+def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
+ """
+ Factory function to create face detectors.
+
+ Args:
+ method (str): Detection method. Options:
+ - 'retinaface': RetinaFace detector (default)
+ - 'scrfd': SCRFD detector (fast and accurate)
+ **kwargs: Detector-specific parameters
+
+ Returns:
+ BaseDetector: Initialized detector instance
+
+ Raises:
+ ValueError: If method is not supported
+
+ Examples:
+ >>> # Basic usage
+ >>> detector = create_detector('retinaface')
+
+ >>> # SCRFD detector with custom parameters
+ >>> detector = create_detector(
+ ... 'scrfd',
+ ... model_name=SCRFDWeights.SCRFD_10G_KPS,
+ ... conf_thresh=0.8,
+ ... input_size=(640, 640)
+ ... )
+
+ >>> # RetinaFace detector
+ >>> detector = create_detector(
+ ... 'retinaface',
+ ... model_name=RetinaFaceWeights.MNET_V2,
+ ... conf_thresh=0.8,
+ ... nms_thresh=0.4
+ ... )
+ """
+ method = method.lower()
+
+ if method == 'retinaface':
+ return RetinaFace(**kwargs)
+
+ elif method == 'scrfd':
+ return SCRFD(**kwargs)
+
+ else:
+ available_methods = ['retinaface', 'scrfd']
+ raise ValueError(
+ f"Unsupported detection method: '{method}'. "
+ f"Available methods: {available_methods}"
+ )
+
+
+def list_available_detectors() -> Dict[str, Dict[str, Any]]:
+ """
+ List all available detection methods with their descriptions and parameters.
+
+ Returns:
+ Dict[str, Dict[str, Any]]: Dictionary of detector information
+ """
+ return {
+ 'retinaface': {
+ 'description': 'RetinaFace detector with high accuracy',
+ 'supports_landmarks': True,
+ 'paper': 'https://arxiv.org/abs/1905.00641',
+ 'default_params': {
+ 'model_name': 'mnet_v2',
+ 'conf_thresh': 0.5,
+ 'nms_thresh': 0.4,
+ 'input_size': (640, 640)
+ }
+ },
+ 'scrfd': {
+ 'description': 'SCRFD detector - fast and accurate with efficient architecture',
+ 'supports_landmarks': True,
+ 'paper': 'https://arxiv.org/abs/2105.04714',
+ 'default_params': {
+ 'model_name': 'scrfd_10g_kps',
+ 'conf_thresh': 0.5,
+ 'nms_thresh': 0.4,
+ 'input_size': (640, 640)
+ }
+ }
+ }
+
+
+__all__ = [
+ 'detect_faces',
+ 'create_detector',
+ 'list_available_detectors',
+ 'SCRFD',
+ 'RetinaFace',
+ 'BaseDetector',
+]
diff --git a/uniface/detection/base.py b/uniface/detection/base.py
new file mode 100644
index 0000000..509a06d
--- /dev/null
+++ b/uniface/detection/base.py
@@ -0,0 +1,100 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+"""
+Base classes for face detection.
+"""
+
+import numpy as np
+from abc import ABC, abstractmethod
+from typing import Tuple, Dict, Any
+
+
+class BaseDetector(ABC):
+ """
+ Abstract base class for all face detectors.
+
+ This class defines the interface that all face detectors must implement,
+ ensuring consistency across different detection methods.
+ """
+
+ def __init__(self, **kwargs):
+ """Initialize the detector with configuration parameters."""
+ self.config = kwargs
+
+ @abstractmethod
+ def detect(self, image: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
+ """
+ Detect faces in an image.
+
+ Args:
+ image (np.ndarray): Input image as numpy array with shape (H, W, C)
+ **kwargs: Additional detection parameters
+
+ Returns:
+ Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
+ - detections: Bounding boxes with confidence scores, shape (N, 5)
+ Format: [x_min, y_min, x_max, y_max, confidence]
+ - landmarks: Facial landmark points, shape (N, 5, 2) for 5-point landmarks
+ or (N, 68, 2) for 68-point landmarks. Empty array if not supported.
+ """
+ pass
+
+ @abstractmethod
+ def preprocess(self, image: np.ndarray) -> np.ndarray:
+ """
+ Preprocess input image for detection.
+
+ Args:
+ image (np.ndarray): Input image
+
+ Returns:
+ np.ndarray: Preprocessed image tensor
+ """
+ pass
+
+ @abstractmethod
+ def postprocess(self, outputs, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
+ """
+ Postprocess model outputs to get final detections.
+
+ Args:
+ outputs: Raw model outputs
+ **kwargs: Additional postprocessing parameters
+
+ Returns:
+ Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
+ """
+ pass
+
+ def __str__(self) -> str:
+ """String representation of the detector."""
+ return f"{self.__class__.__name__}({self.config})"
+
+ def __repr__(self) -> str:
+ """Detailed string representation."""
+ return self.__str__()
+
+ @property
+ def supports_landmarks(self) -> bool:
+ """
+ Whether this detector supports landmark detection.
+
+ Returns:
+ bool: True if landmarks are supported, False otherwise
+ """
+ return hasattr(self, '_supports_landmarks') and self._supports_landmarks
+
+ def get_info(self) -> Dict[str, Any]:
+ """
+ Get detector information and configuration.
+
+ Returns:
+ Dict[str, Any]: Detector information
+ """
+ return {
+ 'name': self.__class__.__name__,
+ 'supports_landmarks': self._supports_landmarks,
+ 'config': self.config
+ }
diff --git a/uniface/detection/retinaface.py b/uniface/detection/retinaface.py
index 9b6ce1b..d5262da 100644
--- a/uniface/detection/retinaface.py
+++ b/uniface/detection/retinaface.py
@@ -2,16 +2,16 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
-import os
-import cv2
import numpy as np
import onnxruntime as ort
-from typing import Tuple, List, Optional, Literal
+from typing import Tuple, List, Literal, Dict, Any
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.constants import RetinaFaceWeights
+
+from .base import BaseDetector
from .utils import (
non_max_supression,
resize_image,
@@ -21,64 +21,64 @@ from .utils import (
)
-class RetinaFace:
+class RetinaFace(BaseDetector):
"""
Face detector based on the RetinaFace architecture.
+ Title: "RetinaFace: Single-stage Dense Face Localisation in the Wild"
+ Paper: https://arxiv.org/abs/1905.00641
+
Args:
- model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
- conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
- nms_thresh (float): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
- pre_nms_topk (int): Number of top-scoring boxes considered before applying NMS. Defaults to 5000.
- post_nms_topk (int): Maximum number of final detections retained after NMS. Defaults to 750.
- dynamic_size (bool): If True, anchors are generated dynamically per input image size. Defaults to False.
- input_size (Tuple[int, int]): Fixed input size (width, height) used when `dynamic_size` is False. Ignored if `dynamic_size=True`.
+ **kwargs: Keyword arguments passed to BaseDetector and RetinaFace. Supported keys include:
+ model_name (RetinaFaceWeights, optional): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
+ conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
+ nms_thresh (float, optional): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
+ pre_nms_topk (int, optional): Number of top-scoring boxes considered before NMS. Defaults to 5000.
+ post_nms_topk (int, optional): Max number of detections kept after NMS. Defaults to 750.
+ dynamic_size (bool, optional): If True, generate anchors dynamically per input image. Defaults to False.
+ input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`. Defaults to (640, 640).
Attributes:
- conf_thresh (float): Threshold for filtering detections based on confidence score.
- nms_thresh (float): IoU threshold for NMS.
- pre_nms_topk (int): Limit on boxes considered before NMS.
- post_nms_topk (int): Limit on detections kept after NMS.
- dynamic_size (bool): Whether anchors are generated dynamically.
- input_size (Tuple[int, int]): Static input size when `dynamic_size` is False.
- _model_path (str): Path to verified model weights. (Internal)
- _priors (np.ndarray): Anchor boxes used for detection. Precomputed if static input size is used. (Internal)
+ model_name (RetinaFaceWeights): Selected model variant.
+ conf_thresh (float): Threshold for confidence-based filtering.
+ nms_thresh (float): IoU threshold used for NMS.
+ pre_nms_topk (int): Limit on proposals before applying NMS.
+ post_nms_topk (int): Limit on retained detections after NMS.
+ dynamic_size (bool): Flag indicating dynamic or static input sizing.
+ input_size (Tuple[int, int]): Static input size if `dynamic_size=False`.
+ _model_path (str): Absolute path to the verified model weights.
+ _priors (np.ndarray): Precomputed anchor boxes (if static size).
+ _supports_landmarks (bool): Indicates landmark prediction support.
Raises:
- ValueError: If model weights are invalid or not found.
- RuntimeError: If the model fails to initialize.
+ ValueError: If the model weights are invalid or not found.
+ RuntimeError: If the ONNX model fails to load or initialize.
"""
- def __init__(
- self,
- model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
- conf_thresh: float = 0.5,
- nms_thresh: float = 0.4,
- pre_nms_topk: int = 5000,
- post_nms_topk: int = 750,
- dynamic_size: bool = False,
- input_size: Tuple[int, int] = (640, 640), # Default input size if dynamic_size=False
- ) -> None:
+ def __init__(self, **kwargs) -> None:
+ super().__init__(**kwargs)
+ self._supports_landmarks = True # RetinaFace supports landmarks
- self.conf_thresh = conf_thresh
- self.nms_thresh = nms_thresh
- self.pre_nms_topk = pre_nms_topk
- self.post_nms_topk = post_nms_topk
- self.dynamic_size = dynamic_size
- self.input_size = input_size
+ self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2)
+ self.conf_thresh = kwargs.get('conf_thresh', 0.5)
+ self.nms_thresh = kwargs.get('nms_thresh', 0.4)
+ self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
+ self.post_nms_topk = kwargs.get('post_nms_topk', 750)
+ self.dynamic_size = kwargs.get('dynamic_size', False)
+ self.input_size = kwargs.get('input_size', (640, 640))
Logger.info(
- f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
- f"input_size={input_size}"
+ f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
+ f"input_size={self.input_size}"
)
# Get path to model weights
- self._model_path = verify_model_weights(model_name)
+ self._model_path = verify_model_weights(self.model_name)
Logger.info(f"Verified model weights located at: {self._model_path}")
# Precompute anchors if using static size
- if not dynamic_size and input_size is not None:
- self._priors = generate_anchors(image_size=input_size)
+ if not self.dynamic_size and self.input_size is not None:
+ self._priors = generate_anchors(image_size=self.input_size)
Logger.debug("Generated anchors for static input size.")
# Initialize model
@@ -137,7 +137,7 @@ class RetinaFace:
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2.0
- ) -> Tuple[np.ndarray, np.ndarray]:
+ ) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -151,9 +151,10 @@ class RetinaFace:
when using the "default" metric. Defaults to 2.0.
Returns:
- Tuple[np.ndarray, np.ndarray]:
- - detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score].
- - landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points.
+ List[Dict[str, Any]]: List of face detection dictionaries, each containing:
+ - 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
+ - 'confidence': float - Detection confidence score
+ - 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
"""
original_height, original_width = image.shape[:2]
@@ -198,7 +199,16 @@ class RetinaFace:
detections = detections[sorted_indices]
landmarks = landmarks[sorted_indices]
- return detections, landmarks
+ faces = []
+ for i in range(detections.shape[0]):
+ face_dict = {
+ 'bbox': detections[i, :4].astype(float).tolist(),
+ 'confidence': detections[i, 4].item(),
+ 'landmarks': landmarks[i].astype(float).tolist()
+ }
+ faces.append(face_dict)
+
+ return faces
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
"""
@@ -259,3 +269,64 @@ class RetinaFace:
landmarks = landmarks * landmark_scale / resize_factor
return boxes, landmarks
+
+
+# TODO: below is only for testing, remove it later
+def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
+ x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
+ cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
+ cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
+
+
+def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
+ for (x, y) in points.astype(np.int32):
+ cv2.circle(frame, (int(x), int(y)), radius, color, -1)
+
+
+if __name__ == "__main__":
+ import cv2
+ detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
+ print(detector.get_info())
+ cap = cv2.VideoCapture(0)
+
+ if not cap.isOpened():
+ print("❌ Failed to open webcam.")
+ exit()
+
+ print("📷 Webcam started. Press 'q' to exit.")
+
+ while True:
+ ret, frame = cap.read()
+ if not ret:
+ print("❌ Failed to read frame.")
+ break
+
+ # Get face detections as list of dictionaries
+ faces = detector.detect(frame)
+
+ # Process each detected face
+ for face in faces:
+ # Extract bbox and landmarks from dictionary
+ bbox = face['bbox'] # [x1, y1, x2, y2]
+ landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
+ confidence = face['confidence']
+
+ # Pass bbox and confidence separately
+ draw_bbox(frame, bbox, confidence)
+
+ # Convert landmarks to numpy array format if needed
+ if landmarks is not None and len(landmarks) > 0:
+ # Convert list of [x, y] pairs to numpy array
+ points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
+ draw_keypoints(frame, points)
+
+ # Display face count
+ cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
+ cv2.imshow("FaceDetection", frame)
+ if cv2.waitKey(1) & 0xFF == ord("q"):
+ break
+
+ cap.release()
+ cv2.destroyAllWindows()
diff --git a/uniface/detection/scrfd.py b/uniface/detection/scrfd.py
index 610ba79..e4966ee 100644
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -1,25 +1,24 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
-# Modified from insightface repository
-import os
import cv2
import numpy as np
import onnxruntime as ort
-from typing import Tuple, List, Literal
+from typing import Tuple, List, Literal, Dict, Any
from uniface.log import Logger
from uniface.constants import SCRFDWeights
from uniface.model_store import verify_model_weights
+from .base import BaseDetector
from .utils import non_max_supression, distance2bbox, distance2kps, resize_image
__all__ = ['SCRFD']
-class SCRFD:
+class SCRFD(BaseDetector):
"""
Face detector based on the SCRFD architecture.
@@ -27,10 +26,12 @@ class SCRFD:
Paper: https://arxiv.org/abs/2105.04714
Args:
- model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`). Specifies the SCRFD variant to load.
- conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
- nms_thresh (float): Non-Maximum Suppression (NMS) threshold. Defaults to 0.4.
- input_size (Tuple[int, int]): Target input resolution (width, height) to resize images. Defaults to (640, 640).
+ **kwargs: Keyword arguments passed to BaseDetector and SCRFD. Supported keys include:
+ model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`).
+ Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
+ conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
+ nms_thresh (float, optional): Non-Maximum Suppression threshold. Defaults to 0.4.
+ input_size (Tuple[int, int], optional): Input image size (width, height). Defaults to (640, 640).
Attributes:
conf_thresh (float): Threshold used to filter low-confidence detections.
@@ -47,13 +48,14 @@ class SCRFD:
RuntimeError: If the ONNX model fails to load or initialize.
"""
- def __init__(
- self,
- model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
- conf_thresh: float = 0.5,
- nms_thresh: float = 0.4,
- input_size: Tuple[int, int] = (640, 640),
- ) -> None:
+ def __init__(self, **kwargs) -> None:
+ super().__init__(**kwargs)
+ self._supports_landmarks = True # SCRFD supports landmarks
+
+ model_name = kwargs.get('model_name', SCRFDWeights.SCRFD_10G_KPS)
+ conf_thresh = kwargs.get('conf_thresh', 0.5)
+ nms_thresh = kwargs.get('nms_thresh', 0.4)
+ input_size = kwargs.get('input_size', (640, 640))
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
@@ -179,7 +181,7 @@ class SCRFD:
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2
- ) -> Tuple[np.ndarray, np.ndarray]:
+ ) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -193,9 +195,10 @@ class SCRFD:
when using the "default" metric. Defaults to 2.0.
Returns:
- Tuple[np.ndarray, np.ndarray]:
- - detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score].
- - landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points.
+ List[Dict[str, Any]]: List of face detection dictionaries, each containing:
+ - 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
+ - 'confidence': float - Detection confidence score
+ - 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
"""
original_height, original_width = image.shape[:2]
@@ -221,20 +224,20 @@ class SCRFD:
keep = non_max_supression(pre_det, threshold=self.nms_thresh)
- det = pre_det[keep, :]
+ detections = pre_det[keep, :]
landmarks = landmarks[order, :, :]
landmarks = landmarks[keep, :, :].astype(np.int32)
- if 0 < max_num < det.shape[0]:
+ if 0 < max_num < detections.shape[0]:
# Calculate area of detections
- area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+ area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
# Calculate offsets from image center
center = (original_height // 2, original_width // 2)
offsets = np.vstack(
[
- (det[:, 0] + det[:, 2]) / 2 - center[1],
- (det[:, 1] + det[:, 3]) / 2 - center[0],
+ (detections[:, 0] + detections[:, 2]) / 2 - center[1],
+ (detections[:, 1] + detections[:, 3]) / 2 - center[0],
]
)
@@ -247,30 +250,36 @@ class SCRFD:
# Sort by scores and select top `max_num`
sorted_indices = np.argsort(values)[::-1][:max_num]
- det = det[sorted_indices]
+ detections = detections[sorted_indices]
landmarks = landmarks[sorted_indices]
- return det, landmarks
+ faces = []
+ for i in range(detections.shape[0]):
+ face_dict = {
+ 'bbox': detections[i, :4].astype(float).tolist(),
+ 'confidence': detections[i, 4].item(),
+ 'landmarks': landmarks[i].astype(float).tolist()
+ }
+ faces.append(face_dict)
+
+ return faces
+
# TODO: below is only for testing, remove it later
-
-
-def draw_bbox(frame, bbox, color=(0, 255, 0), thickness=2):
- x1, y1, x2, y2 = bbox[:4].astype(np.int32)
+def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
+ x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
- score = bbox[4]
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
for (x, y) in points.astype(np.int32):
- cv2.circle(frame, (x, y), radius, color, -1)
+ cv2.circle(frame, (int(x), int(y)), radius, color, -1)
-# TODO: Remove late, just for testing
-
if __name__ == "__main__":
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
+ print(detector.get_info())
cap = cv2.VideoCapture(0)
if not cap.isOpened():
@@ -285,14 +294,29 @@ if __name__ == "__main__":
print("❌ Failed to read frame.")
break
- boxes_list, points_list = detector.detect(frame)
+ # Get face detections as list of dictionaries
+ faces = detector.detect(frame)
- for boxes, points in zip(boxes_list, points_list):
- draw_bbox(frame, boxes)
+ # Process each detected face
+ for face in faces:
+ # Extract bbox and landmarks from dictionary
+ bbox = face['bbox'] # [x1, y1, x2, y2]
+ landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
+ confidence = face['confidence']
- if points is not None:
+ # Pass bbox and confidence separately
+ draw_bbox(frame, bbox, confidence)
+
+ # Convert landmarks to numpy array format if needed
+ if landmarks is not None and len(landmarks) > 0:
+ # Convert list of [x, y] pairs to numpy array
+ points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
+ # Display face count
+ cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
diff --git a/uniface/landmark/__init__.py b/uniface/landmark/__init__.py
index e69de29..2271f13 100644
--- a/uniface/landmark/__init__.py
+++ b/uniface/landmark/__init__.py
@@ -0,0 +1 @@
+from .model import Landmark
diff --git a/uniface/landmark/model.py b/uniface/landmark/model.py
index 955c7eb..39630f8 100644
--- a/uniface/landmark/model.py
+++ b/uniface/landmark/model.py
@@ -1,7 +1,10 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
import cv2
-import onnx
-import onnxruntime as ort
import numpy as np
+import onnxruntime as ort
from typing import Tuple
@@ -15,12 +18,32 @@ __all__ = ['Landmark']
class Landmark:
"""
- Facial landmark detection model for predicting facial keypoints.
- """
+ Facial landmark detection model for predicting 106 facial keypoints using ONNX model.
+
+ This class wraps a pretrained facial landmark model to detect 106 key facial points
+ such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box.
+ It handles model verification, input preprocessing, ONNX inference execution,
+ and projection of landmark coordinates back to the original image space.
+
+ Attributes:
+ input_size (Tuple[int, int]): Model's expected input resolution (width, height).
+ input_mean (float): Mean value used for input normalization.
+ input_std (float): Standard deviation used for input normalization.
+ model_path (str): Path to the verified ONNX model file.
+ session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
+ input_names (List[str]): List of input node names.
+ output_names (List[str]): List of output node names.
+ lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y).
+ lmk_num (int): Total number of landmark points predicted by the model (106).
+ Args:
+ model_name (LandmarkWeights): Enum specifying the landmark model to load.
+ input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192).
+ """
+
def __init__(
- self,
- model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
+ self,
+ model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192)
) -> None:
"""
@@ -50,7 +73,7 @@ class Landmark:
def _initialize_model(self):
"""
Initialize the ONNX model from the stored model path.
-
+
Raises:
RuntimeError: If the model fails to load or initialize.
"""
@@ -73,7 +96,7 @@ class Landmark:
output_shape = self.session.get_outputs()[0].shape
self.lmk_dim = 2 # x,y coordinates
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
-
+
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
except Exception as e:
@@ -96,7 +119,7 @@ class Landmark:
# Calculate face dimensions and center
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
-
+
# Determine scale to fit face with some margin
scale = self.input_size[0] / (max(width, height) * 1.5)
rotation = 0.0
@@ -105,7 +128,7 @@ class Landmark:
aligned_face, transform_matrix = bbox_center_alignment(
image, center, self.input_size[0], scale, rotation
)
-
+
# Convert to blob format for inference
face_blob = cv2.dnn.blobFromImage(
aligned_face,
@@ -114,7 +137,7 @@ class Landmark:
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
)
-
+
return face_blob, transform_matrix
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
@@ -154,13 +177,13 @@ class Landmark:
"""
# Preprocess image
face_blob, transform_matrix = self.preprocess(image, bbox)
-
+
# Run inference
raw_predictions = self.session.run(
- self.output_names,
+ self.output_names,
{self.input_names[0]: face_blob}
)[0][0]
-
+
# Postprocess to get landmarks in original image space
landmarks = self.postprocess(raw_predictions, transform_matrix)
@@ -172,7 +195,7 @@ class Landmark:
if __name__ == "__main__":
from uniface.detection import RetinaFace
from uniface.constants import RetinaFaceWeights
-
+
face_detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py
index b804cdb..3f98a2c 100644
--- a/uniface/recognition/base.py
+++ b/uniface/recognition/base.py
@@ -1,22 +1,21 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
-# Modified from insightface repository
-import os
import cv2
import numpy as np
import onnxruntime as ort
-from typing import Tuple, Optional, Union, List
from dataclasses import dataclass
+from typing import Tuple, Union, List
+
from uniface.log import Logger
from uniface.model_store import verify_model_weights
-from uniface.face_utils import compute_similarity, face_alignment
+from uniface.face_utils import face_alignment
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
-__all__ = ["BaseFaceEncoder", "PreprocessConfig"]
+__all__ = ["BaseModel", "PreprocessConfig"]
@dataclass
@@ -29,7 +28,7 @@ class PreprocessConfig:
input_size: Tuple[int, int] = (112, 112)
-class BaseFaceEncoder:
+class BaseModel:
"""
Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace).
"""
@@ -133,7 +132,7 @@ class BaseFaceEncoder:
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
"""
- Extracts face embedding from an aligned image.
+ Extracts face embedding from an image.
Args:
image: Input face image (BGR format).
@@ -150,3 +149,17 @@ class BaseFaceEncoder:
embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
return embedding
+
+ def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
+ """
+ Extracts l2 normalized face embedding vector from an image
+
+ Args:
+ image: Input face image (BGR format).
+ landmarks: Facial landmarks (5 points for alignment).
+
+ Returns:
+ Normalied face embedding vector (typically 512-dimensional).
+ """
+ embedding = self.get_embedding(image, landmarks)
+ return embedding / np.linalg.norm(embedding)
diff --git a/uniface/recognition/models.py b/uniface/recognition/models.py
index 9b93176..593f18f 100644
--- a/uniface/recognition/models.py
+++ b/uniface/recognition/models.py
@@ -5,13 +5,24 @@
from typing import Optional
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
-from .base import BaseFaceEncoder, PreprocessConfig
+from .base import BaseModel, PreprocessConfig
__all__ = ["SphereFace", "MobileFace", "ArcFace"]
-class SphereFace(BaseFaceEncoder):
+class SphereFace(BaseModel):
+ """
+ SphereFace face encoder class.
+
+ This class loads a SphereFace model for face embedding extraction.
+ It supports configurable preprocessing, with a default mean/std and input size of 112x112.
+
+ Args:
+ model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20.
+ preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization.
+ """
+
def __init__(
self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None
@@ -25,7 +36,18 @@ class SphereFace(BaseFaceEncoder):
super().__init__(model_name=model_name, preprocessing=preprocessing)
-class MobileFace(BaseFaceEncoder):
+class MobileFace(BaseModel):
+ """
+ MobileFace face encoder class.
+
+ Loads a lightweight MobileFaceNet model for fast face embedding extraction.
+ Default input normalization and resizing applied if preprocessing is not provided.
+
+ Args:
+ model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2.
+ preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size.
+ """
+
def __init__(
self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None
@@ -39,7 +61,18 @@ class MobileFace(BaseFaceEncoder):
super().__init__(model_name=model_name)
-class ArcFace(BaseFaceEncoder):
+class ArcFace(BaseModel):
+ """
+ ArcFace face encoder class.
+
+ Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation.
+ Applies standard preprocessing unless overridden.
+
+ Args:
+ model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET.
+ preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified.
+ """
+
def __init__(
self, model_name: ArcFaceWeights = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None