From b15504dfc5c06ccb3efabf8a0040e1b0b365d6b2 Mon Sep 17 00:00:00 2001 From: yakhyo Date: Wed, 2 Jul 2025 16:32:50 +0900 Subject: [PATCH] feat: Face detection module has been updated --- README.md | 32 +++---- docs/installation.md | 38 +++++++- docs/reference/attribute.md | 9 ++ docs/reference/detection.md | 11 ++- docs/reference/landmark.md | 5 + docs/reference/recognition.md | 14 ++- examples/face_detection.ipynb | 6 +- mkdocs.yml | 2 + requirements_mkdocs.txt | 2 + scripts/run_recognition.py | 8 +- uniface/__init__.py | 25 +++-- uniface/attribute/age_gender.py | 25 ++++- uniface/attribute/emotion.py | 3 +- uniface/detection/__init__.py | 146 +++++++++++++++++++++++++++- uniface/detection/base.py | 100 +++++++++++++++++++ uniface/detection/retinaface.py | 165 +++++++++++++++++++++++--------- uniface/detection/scrfd.py | 100 +++++++++++-------- uniface/landmark/__init__.py | 1 + uniface/landmark/model.py | 53 +++++++--- uniface/recognition/base.py | 27 ++++-- uniface/recognition/models.py | 41 +++++++- 21 files changed, 656 insertions(+), 157 deletions(-) create mode 100644 docs/reference/attribute.md create mode 100644 docs/reference/landmark.md create mode 100644 uniface/detection/base.py diff --git a/README.md b/README.md index 532b2b2..5ff90c3 100644 --- a/README.md +++ b/README.md @@ -19,14 +19,14 @@ ## Features -| Date | Feature Description | -| ---------- | --------------------------------------------------------------------------------------------------------------- | -| Planned | 🎭 **Age and Gender Detection**: Planned feature for predicting age and gender from facial images. | -| Planned | 🧩 **Face Recognition**: Upcoming capability to identify and verify faces. | -| 2024-11-21 | 🔄 **Face Alignment**: Added precise face alignment for better downstream tasks. | -| 2024-11-20 | ⚡ **High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. | -| 2024-11-20 | 🎯 **Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. | -| 2024-11-20 | 🛠 **API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. | +| Date | Feature Description | +| ---------- | --------------------------------------------------------------------------------------------------------------------- | +| Planned | 🎭**Age and Gender Detection**: Planned feature for predicting age and gender from facial images. | +| Planned | 🧩**Face Recognition**: Upcoming capability to identify and verify faces. | +| 2024-11-21 | 🔄**Face Alignment**: Added precise face alignment for better downstream tasks. | +| 2024-11-20 | ⚡**High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. | +| 2024-11-20 | 🎯**Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. | +| 2024-11-20 | 🛠**API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. | --- @@ -43,7 +43,7 @@ To work with the latest version of **UniFace**, which may not yet be released on ```bash git clone https://github.com/yakhyo/uniface.git cd uniface -pip install . +pip install -e . ``` --- @@ -179,13 +179,13 @@ cv2.destroyAllWindows() ### Evaluation results of available models on WiderFace -| RetinaFace Models | Easy | Medium | Hard | -| ------------------ | ---------- | ---------- | ---------- | -| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | -| retinaface_mnet050 | 89.42% | 87.97% | 82.40% | -| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% | -| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | -| retinaface_r18 | 92.50% | 91.02% | 86.63% | +| RetinaFace Models | Easy | Medium | Hard | +| ------------------ | ---------------- | ---------------- | ---------------- | +| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | +| retinaface_mnet050 | 89.42% | 87.97% | 82.40% | +| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% | +| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | +| retinaface_r18 | 92.50% | 91.02% | 86.63% | | retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
diff --git a/docs/installation.md b/docs/installation.md index c7df120..02ed78f 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,3 +1,37 @@ -# Installation +# 🚀 Installation -Instructions to install UniFace. \ No newline at end of file +## 📦 Install from PyPI + +### CPU-only (default): + +```bash +pip install uniface +``` + +This installs the CPU-compatible version of ONNX Runtime (`onnxruntime`) and all core dependencies. + +### GPU support: + +```bash +pip install "uniface[gpu]" +``` + +This installs `onnxruntime-gpu` for accelerated inference on supported NVIDIA GPUs. +Make sure your system meets the [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/build/eps.html#cuda). + +--- + +## 🔧 Install from GitHub (latest version) + +Clone the repository and install it manually: + +```bash +git clone https://github.com/yakhyo/uniface.git +cd uniface + +# CPU version +pip install . + +# Or with GPU support +pip install ".[gpu]" +``` diff --git a/docs/reference/attribute.md b/docs/reference/attribute.md new file mode 100644 index 0000000..cd17483 --- /dev/null +++ b/docs/reference/attribute.md @@ -0,0 +1,9 @@ +# Facial Attribute API Reference + +# Age and Gender Model + +::: uniface.attribute.age_gender.AgeGender + +# Emotion Model + +:::uniface.attribute.emotion.Emotion diff --git a/docs/reference/detection.md b/docs/reference/detection.md index c013de3..b53a52d 100644 --- a/docs/reference/detection.md +++ b/docs/reference/detection.md @@ -1,3 +1,10 @@ -# Detection API Reference +# Face Detection API Reference -::: uniface.RetinaFace +# RetinaFace + +::: uniface.detection.retinaface.RetinaFace + + +# SCRFD + +::: uniface.detection.scrfd.SCRFD diff --git a/docs/reference/landmark.md b/docs/reference/landmark.md new file mode 100644 index 0000000..1ff3165 --- /dev/null +++ b/docs/reference/landmark.md @@ -0,0 +1,5 @@ +# Landmark API Reference + +# Landmark Model + +::: uniface.landmark.model.Landmark diff --git a/docs/reference/recognition.md b/docs/reference/recognition.md index 91933e0..5691d71 100644 --- a/docs/reference/recognition.md +++ b/docs/reference/recognition.md @@ -1,13 +1,17 @@ -# Recognition API Reference +# Face Recognition API Reference +# SphereFace -# SphereFace Model ::: uniface.recognition.models.SphereFace +# MobileFace -# MobileFace Model :::uniface.recognition.models.MobileFace +# ArcFace -# Base Face Encoder Model -:::uniface.recognition.base.BaseFaceEncoder \ No newline at end of file +:::uniface.recognition.models.ArcFace + +# BaseFaceEncoder class + +:::uniface.recognition.base.BaseFaceEncoder diff --git a/examples/face_detection.ipynb b/examples/face_detection.ipynb index f3447b6..6f03d9c 100644 --- a/examples/face_detection.ipynb +++ b/examples/face_detection.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -265,7 +265,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "uniface", "language": "python", "name": "python3" }, @@ -279,7 +279,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.12.11" } }, "nbformat": 4, diff --git a/mkdocs.yml b/mkdocs.yml index d80ce67..fff0dd9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -70,6 +70,8 @@ nav: - API Reference: - Detection: reference/detection.md - Recognition: reference/recognition.md + - Landmark: reference/landmark.md + - Attribute: reference/attribute.md - About: - Changelog: about/changelog.md - License: about/license.md diff --git a/requirements_mkdocs.txt b/requirements_mkdocs.txt index ac7dc76..fd499df 100644 --- a/requirements_mkdocs.txt +++ b/requirements_mkdocs.txt @@ -1 +1,3 @@ +mkdocs-material +mkdocs-minify-plugin mkdocstrings[python] diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py index f1b82e4..183aead 100644 --- a/scripts/run_recognition.py +++ b/scripts/run_recognition.py @@ -29,11 +29,11 @@ def run_inference(detector, recognizer, image_path): print(f"Detected {len(boxes)} face(s). Extracting embeddings...") - for i, landmark in enumerate(landmarks): + for i, landmark in enumerate(landmarks[:1]): embedding = recognizer.get_embedding(image, landmark) - norm = np.linalg.norm(embedding) - print(f"\nFace {i} embedding (L2 norm = {norm:.4f}):") - print(embedding) + norm_embedding = recognizer.get_normalized_embedding(image, landmark) + print("embedding:", np.sum(embedding)) + print("norm embedding:",np.sum(norm_embedding)) def main(): diff --git a/uniface/__init__.py b/uniface/__init__.py index 3250e33..094f77a 100644 --- a/uniface/__init__.py +++ b/uniface/__init__.py @@ -15,21 +15,32 @@ __license__ = "MIT" __author__ = "Yakhyokhuja Valikhujaev" __version__ = "0.1.8" +from .detection import detect_faces, create_detector, list_available_detectors -from uniface.retinaface import RetinaFace -from uniface.log import Logger -from uniface.model_store import verify_model_weights from uniface.face_utils import face_alignment, compute_similarity +from uniface.model_store import verify_model_weights from uniface.visualization import draw_detections +from uniface.log import Logger + + __all__ = [ + # Metadata "__version__", "__author__", "__license__", - "RetinaFace", - "Logger", - "verify_model_weights", - "draw_detections", + + # Core functions + 'detect_faces', + 'create_detector', + 'list_available_detectors', + + # Utility functions "face_alignment", "compute_similarity", + "verify_model_weights", + "draw_detections", + + # Classes + "Logger", ] diff --git a/uniface/attribute/age_gender.py b/uniface/attribute/age_gender.py index faab4fd..efb844e 100644 --- a/uniface/attribute/age_gender.py +++ b/uniface/attribute/age_gender.py @@ -1,6 +1,11 @@ +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + import cv2 import numpy as np import onnxruntime as ort + from typing import Tuple from uniface.log import Logger @@ -14,10 +19,24 @@ __all__ = ["AgeGender"] class AgeGender: """ - Age and Gender Prediction Model. + Age and gender prediction model using ONNX Runtime. - This model predicts both a person's gender (male/female) and age from a facial image. - Gender is returned as an integer (0: female, 1: male) and age as years. + Loads a pretrained ONNX model to predict both age (in years) and gender + (0: female, 1: male) from a detected face region. Handles model loading, + preprocessing, inference, and output interpretation. + + Attributes: + input_size (Tuple[int, int]): Model's expected input resolution (width, height). + input_mean (float): Mean value used for input normalization. + input_std (float): Standard deviation used for input normalization. + model_path (str): Path to the verified ONNX model file. + session (onnxruntime.InferenceSession): ONNX Runtime session for inference. + input_names (List[str]): List of input node names. + output_names (List[str]): List of output node names. + + Args: + model_name (AgeGenderWeights): Enum specifying the age-gender model to load. + input_size (Tuple[int, int]): Resolution for model input; defaults to (112, 112). """ def __init__( diff --git a/uniface/attribute/emotion.py b/uniface/attribute/emotion.py index 8d49993..17a3c19 100644 --- a/uniface/attribute/emotion.py +++ b/uniface/attribute/emotion.py @@ -3,9 +3,8 @@ # GitHub: https://github.com/yakhyo import cv2 -import numpy as np import torch -from PIL import Image +import numpy as np from typing import Tuple, Union diff --git a/uniface/detection/__init__.py b/uniface/detection/__init__.py index 85f5beb..95ce09d 100644 --- a/uniface/detection/__init__.py +++ b/uniface/detection/__init__.py @@ -1,3 +1,145 @@ -from .retinaface import RetinaFace +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + + +import numpy as np +from typing import Tuple, Dict, Any, List + from .scrfd import SCRFD -from uniface.visualization import draw_detections +from .base import BaseDetector +from .retinaface import RetinaFace + +# Global cache for detector instances +_detector_cache: Dict[str, BaseDetector] = {} + + +def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]: + """ + High-level face detection function. + + Args: + image (np.ndarray): Input image as numpy array. + method (str): Detection method to use. Options: 'retinaface', 'scrfd'. + **kwargs: Additional arguments passed to the detector. + + Returns: + List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains: + - 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates. + - 'confidence' (float): The confidence score of the detection. + - 'landmarks' (List[List[float]]): 5-point facial landmarks. + + Example: + >>> from uniface import detect_faces + >>> image = cv2.imread("your_image.jpg") + >>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8) + >>> for face in faces: + ... print(f"Found face with confidence: {face['confidence']}") + ... print(f"BBox: {face['bbox']}") + """ + method_name = method.lower() + + sorted_kwargs = sorted(kwargs.items()) + cache_key = f"{method_name}_{str(sorted_kwargs)}" + + if cache_key not in _detector_cache: + # Pass kwargs to create the correctly configured detector + _detector_cache[cache_key] = create_detector(method, **kwargs) + + detector = _detector_cache[cache_key] + return detector.detect(image) + + +def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector: + """ + Factory function to create face detectors. + + Args: + method (str): Detection method. Options: + - 'retinaface': RetinaFace detector (default) + - 'scrfd': SCRFD detector (fast and accurate) + **kwargs: Detector-specific parameters + + Returns: + BaseDetector: Initialized detector instance + + Raises: + ValueError: If method is not supported + + Examples: + >>> # Basic usage + >>> detector = create_detector('retinaface') + + >>> # SCRFD detector with custom parameters + >>> detector = create_detector( + ... 'scrfd', + ... model_name=SCRFDWeights.SCRFD_10G_KPS, + ... conf_thresh=0.8, + ... input_size=(640, 640) + ... ) + + >>> # RetinaFace detector + >>> detector = create_detector( + ... 'retinaface', + ... model_name=RetinaFaceWeights.MNET_V2, + ... conf_thresh=0.8, + ... nms_thresh=0.4 + ... ) + """ + method = method.lower() + + if method == 'retinaface': + return RetinaFace(**kwargs) + + elif method == 'scrfd': + return SCRFD(**kwargs) + + else: + available_methods = ['retinaface', 'scrfd'] + raise ValueError( + f"Unsupported detection method: '{method}'. " + f"Available methods: {available_methods}" + ) + + +def list_available_detectors() -> Dict[str, Dict[str, Any]]: + """ + List all available detection methods with their descriptions and parameters. + + Returns: + Dict[str, Dict[str, Any]]: Dictionary of detector information + """ + return { + 'retinaface': { + 'description': 'RetinaFace detector with high accuracy', + 'supports_landmarks': True, + 'paper': 'https://arxiv.org/abs/1905.00641', + 'default_params': { + 'model_name': 'mnet_v2', + 'conf_thresh': 0.5, + 'nms_thresh': 0.4, + 'input_size': (640, 640) + } + }, + 'scrfd': { + 'description': 'SCRFD detector - fast and accurate with efficient architecture', + 'supports_landmarks': True, + 'paper': 'https://arxiv.org/abs/2105.04714', + 'default_params': { + 'model_name': 'scrfd_10g_kps', + 'conf_thresh': 0.5, + 'nms_thresh': 0.4, + 'input_size': (640, 640) + } + } + } + + +__all__ = [ + 'detect_faces', + 'create_detector', + 'list_available_detectors', + 'SCRFD', + 'RetinaFace', + 'BaseDetector', +] diff --git a/uniface/detection/base.py b/uniface/detection/base.py new file mode 100644 index 0000000..509a06d --- /dev/null +++ b/uniface/detection/base.py @@ -0,0 +1,100 @@ +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +""" +Base classes for face detection. +""" + +import numpy as np +from abc import ABC, abstractmethod +from typing import Tuple, Dict, Any + + +class BaseDetector(ABC): + """ + Abstract base class for all face detectors. + + This class defines the interface that all face detectors must implement, + ensuring consistency across different detection methods. + """ + + def __init__(self, **kwargs): + """Initialize the detector with configuration parameters.""" + self.config = kwargs + + @abstractmethod + def detect(self, image: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]: + """ + Detect faces in an image. + + Args: + image (np.ndarray): Input image as numpy array with shape (H, W, C) + **kwargs: Additional detection parameters + + Returns: + Tuple[np.ndarray, np.ndarray]: (detections, landmarks) + - detections: Bounding boxes with confidence scores, shape (N, 5) + Format: [x_min, y_min, x_max, y_max, confidence] + - landmarks: Facial landmark points, shape (N, 5, 2) for 5-point landmarks + or (N, 68, 2) for 68-point landmarks. Empty array if not supported. + """ + pass + + @abstractmethod + def preprocess(self, image: np.ndarray) -> np.ndarray: + """ + Preprocess input image for detection. + + Args: + image (np.ndarray): Input image + + Returns: + np.ndarray: Preprocessed image tensor + """ + pass + + @abstractmethod + def postprocess(self, outputs, **kwargs) -> Tuple[np.ndarray, np.ndarray]: + """ + Postprocess model outputs to get final detections. + + Args: + outputs: Raw model outputs + **kwargs: Additional postprocessing parameters + + Returns: + Tuple[np.ndarray, np.ndarray]: (detections, landmarks) + """ + pass + + def __str__(self) -> str: + """String representation of the detector.""" + return f"{self.__class__.__name__}({self.config})" + + def __repr__(self) -> str: + """Detailed string representation.""" + return self.__str__() + + @property + def supports_landmarks(self) -> bool: + """ + Whether this detector supports landmark detection. + + Returns: + bool: True if landmarks are supported, False otherwise + """ + return hasattr(self, '_supports_landmarks') and self._supports_landmarks + + def get_info(self) -> Dict[str, Any]: + """ + Get detector information and configuration. + + Returns: + Dict[str, Any]: Detector information + """ + return { + 'name': self.__class__.__name__, + 'supports_landmarks': self._supports_landmarks, + 'config': self.config + } diff --git a/uniface/detection/retinaface.py b/uniface/detection/retinaface.py index 9b6ce1b..d5262da 100644 --- a/uniface/detection/retinaface.py +++ b/uniface/detection/retinaface.py @@ -2,16 +2,16 @@ # Author: Yakhyokhuja Valikhujaev # GitHub: https://github.com/yakhyo -import os -import cv2 import numpy as np import onnxruntime as ort -from typing import Tuple, List, Optional, Literal +from typing import Tuple, List, Literal, Dict, Any from uniface.log import Logger from uniface.model_store import verify_model_weights from uniface.constants import RetinaFaceWeights + +from .base import BaseDetector from .utils import ( non_max_supression, resize_image, @@ -21,64 +21,64 @@ from .utils import ( ) -class RetinaFace: +class RetinaFace(BaseDetector): """ Face detector based on the RetinaFace architecture. + Title: "RetinaFace: Single-stage Dense Face Localisation in the Wild" + Paper: https://arxiv.org/abs/1905.00641 + Args: - model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`. - conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5. - nms_thresh (float): Non-maximum suppression (NMS) threshold. Defaults to 0.4. - pre_nms_topk (int): Number of top-scoring boxes considered before applying NMS. Defaults to 5000. - post_nms_topk (int): Maximum number of final detections retained after NMS. Defaults to 750. - dynamic_size (bool): If True, anchors are generated dynamically per input image size. Defaults to False. - input_size (Tuple[int, int]): Fixed input size (width, height) used when `dynamic_size` is False. Ignored if `dynamic_size=True`. + **kwargs: Keyword arguments passed to BaseDetector and RetinaFace. Supported keys include: + model_name (RetinaFaceWeights, optional): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`. + conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5. + nms_thresh (float, optional): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4. + pre_nms_topk (int, optional): Number of top-scoring boxes considered before NMS. Defaults to 5000. + post_nms_topk (int, optional): Max number of detections kept after NMS. Defaults to 750. + dynamic_size (bool, optional): If True, generate anchors dynamically per input image. Defaults to False. + input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`. Defaults to (640, 640). Attributes: - conf_thresh (float): Threshold for filtering detections based on confidence score. - nms_thresh (float): IoU threshold for NMS. - pre_nms_topk (int): Limit on boxes considered before NMS. - post_nms_topk (int): Limit on detections kept after NMS. - dynamic_size (bool): Whether anchors are generated dynamically. - input_size (Tuple[int, int]): Static input size when `dynamic_size` is False. - _model_path (str): Path to verified model weights. (Internal) - _priors (np.ndarray): Anchor boxes used for detection. Precomputed if static input size is used. (Internal) + model_name (RetinaFaceWeights): Selected model variant. + conf_thresh (float): Threshold for confidence-based filtering. + nms_thresh (float): IoU threshold used for NMS. + pre_nms_topk (int): Limit on proposals before applying NMS. + post_nms_topk (int): Limit on retained detections after NMS. + dynamic_size (bool): Flag indicating dynamic or static input sizing. + input_size (Tuple[int, int]): Static input size if `dynamic_size=False`. + _model_path (str): Absolute path to the verified model weights. + _priors (np.ndarray): Precomputed anchor boxes (if static size). + _supports_landmarks (bool): Indicates landmark prediction support. Raises: - ValueError: If model weights are invalid or not found. - RuntimeError: If the model fails to initialize. + ValueError: If the model weights are invalid or not found. + RuntimeError: If the ONNX model fails to load or initialize. """ - def __init__( - self, - model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2, - conf_thresh: float = 0.5, - nms_thresh: float = 0.4, - pre_nms_topk: int = 5000, - post_nms_topk: int = 750, - dynamic_size: bool = False, - input_size: Tuple[int, int] = (640, 640), # Default input size if dynamic_size=False - ) -> None: + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self._supports_landmarks = True # RetinaFace supports landmarks - self.conf_thresh = conf_thresh - self.nms_thresh = nms_thresh - self.pre_nms_topk = pre_nms_topk - self.post_nms_topk = post_nms_topk - self.dynamic_size = dynamic_size - self.input_size = input_size + self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2) + self.conf_thresh = kwargs.get('conf_thresh', 0.5) + self.nms_thresh = kwargs.get('nms_thresh', 0.4) + self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000) + self.post_nms_topk = kwargs.get('post_nms_topk', 750) + self.dynamic_size = kwargs.get('dynamic_size', False) + self.input_size = kwargs.get('input_size', (640, 640)) Logger.info( - f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, " - f"input_size={input_size}" + f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, " + f"input_size={self.input_size}" ) # Get path to model weights - self._model_path = verify_model_weights(model_name) + self._model_path = verify_model_weights(self.model_name) Logger.info(f"Verified model weights located at: {self._model_path}") # Precompute anchors if using static size - if not dynamic_size and input_size is not None: - self._priors = generate_anchors(image_size=input_size) + if not self.dynamic_size and self.input_size is not None: + self._priors = generate_anchors(image_size=self.input_size) Logger.debug("Generated anchors for static input size.") # Initialize model @@ -137,7 +137,7 @@ class RetinaFace: max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2.0 - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> List[Dict[str, Any]]: """ Perform face detection on an input image and return bounding boxes and facial landmarks. @@ -151,9 +151,10 @@ class RetinaFace: when using the "default" metric. Defaults to 2.0. Returns: - Tuple[np.ndarray, np.ndarray]: - - detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score]. - - landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points. + List[Dict[str, Any]]: List of face detection dictionaries, each containing: + - 'bbox': [x1, y1, x2, y2] - Bounding box coordinates + - 'confidence': float - Detection confidence score + - 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks """ original_height, original_width = image.shape[:2] @@ -198,7 +199,16 @@ class RetinaFace: detections = detections[sorted_indices] landmarks = landmarks[sorted_indices] - return detections, landmarks + faces = [] + for i in range(detections.shape[0]): + face_dict = { + 'bbox': detections[i, :4].astype(float).tolist(), + 'confidence': detections[i, 4].item(), + 'landmarks': landmarks[i].astype(float).tolist() + } + faces.append(face_dict) + + return faces def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]: """ @@ -259,3 +269,64 @@ class RetinaFace: landmarks = landmarks * landmark_scale / resize_factor return boxes, landmarks + + +# TODO: below is only for testing, remove it later +def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2): + x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values + cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness) + cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) + + +def draw_keypoints(frame, points, color=(0, 0, 255), radius=2): + for (x, y) in points.astype(np.int32): + cv2.circle(frame, (int(x), int(y)), radius, color, -1) + + +if __name__ == "__main__": + import cv2 + detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050) + print(detector.get_info()) + cap = cv2.VideoCapture(0) + + if not cap.isOpened(): + print("❌ Failed to open webcam.") + exit() + + print("📷 Webcam started. Press 'q' to exit.") + + while True: + ret, frame = cap.read() + if not ret: + print("❌ Failed to read frame.") + break + + # Get face detections as list of dictionaries + faces = detector.detect(frame) + + # Process each detected face + for face in faces: + # Extract bbox and landmarks from dictionary + bbox = face['bbox'] # [x1, y1, x2, y2] + landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...] + confidence = face['confidence'] + + # Pass bbox and confidence separately + draw_bbox(frame, bbox, confidence) + + # Convert landmarks to numpy array format if needed + if landmarks is not None and len(landmarks) > 0: + # Convert list of [x, y] pairs to numpy array + points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2) + draw_keypoints(frame, points) + + # Display face count + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) + + cv2.imshow("FaceDetection", frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + cv2.destroyAllWindows() diff --git a/uniface/detection/scrfd.py b/uniface/detection/scrfd.py index 610ba79..e4966ee 100644 --- a/uniface/detection/scrfd.py +++ b/uniface/detection/scrfd.py @@ -1,25 +1,24 @@ # Copyright 2025 Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev # GitHub: https://github.com/yakhyo -# Modified from insightface repository -import os import cv2 import numpy as np import onnxruntime as ort -from typing import Tuple, List, Literal +from typing import Tuple, List, Literal, Dict, Any from uniface.log import Logger from uniface.constants import SCRFDWeights from uniface.model_store import verify_model_weights +from .base import BaseDetector from .utils import non_max_supression, distance2bbox, distance2kps, resize_image __all__ = ['SCRFD'] -class SCRFD: +class SCRFD(BaseDetector): """ Face detector based on the SCRFD architecture. @@ -27,10 +26,12 @@ class SCRFD: Paper: https://arxiv.org/abs/2105.04714 Args: - model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`). Specifies the SCRFD variant to load. - conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5. - nms_thresh (float): Non-Maximum Suppression (NMS) threshold. Defaults to 0.4. - input_size (Tuple[int, int]): Target input resolution (width, height) to resize images. Defaults to (640, 640). + **kwargs: Keyword arguments passed to BaseDetector and SCRFD. Supported keys include: + model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`). + Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS. + conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5. + nms_thresh (float, optional): Non-Maximum Suppression threshold. Defaults to 0.4. + input_size (Tuple[int, int], optional): Input image size (width, height). Defaults to (640, 640). Attributes: conf_thresh (float): Threshold used to filter low-confidence detections. @@ -47,13 +48,14 @@ class SCRFD: RuntimeError: If the ONNX model fails to load or initialize. """ - def __init__( - self, - model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS, - conf_thresh: float = 0.5, - nms_thresh: float = 0.4, - input_size: Tuple[int, int] = (640, 640), - ) -> None: + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self._supports_landmarks = True # SCRFD supports landmarks + + model_name = kwargs.get('model_name', SCRFDWeights.SCRFD_10G_KPS) + conf_thresh = kwargs.get('conf_thresh', 0.5) + nms_thresh = kwargs.get('nms_thresh', 0.4) + input_size = kwargs.get('input_size', (640, 640)) self.conf_thresh = conf_thresh self.nms_thresh = nms_thresh @@ -179,7 +181,7 @@ class SCRFD: max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2 - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> List[Dict[str, Any]]: """ Perform face detection on an input image and return bounding boxes and facial landmarks. @@ -193,9 +195,10 @@ class SCRFD: when using the "default" metric. Defaults to 2.0. Returns: - Tuple[np.ndarray, np.ndarray]: - - detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score]. - - landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points. + List[Dict[str, Any]]: List of face detection dictionaries, each containing: + - 'bbox': [x1, y1, x2, y2] - Bounding box coordinates + - 'confidence': float - Detection confidence score + - 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks """ original_height, original_width = image.shape[:2] @@ -221,20 +224,20 @@ class SCRFD: keep = non_max_supression(pre_det, threshold=self.nms_thresh) - det = pre_det[keep, :] + detections = pre_det[keep, :] landmarks = landmarks[order, :, :] landmarks = landmarks[keep, :, :].astype(np.int32) - if 0 < max_num < det.shape[0]: + if 0 < max_num < detections.shape[0]: # Calculate area of detections - area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) + area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1]) # Calculate offsets from image center center = (original_height // 2, original_width // 2) offsets = np.vstack( [ - (det[:, 0] + det[:, 2]) / 2 - center[1], - (det[:, 1] + det[:, 3]) / 2 - center[0], + (detections[:, 0] + detections[:, 2]) / 2 - center[1], + (detections[:, 1] + detections[:, 3]) / 2 - center[0], ] ) @@ -247,30 +250,36 @@ class SCRFD: # Sort by scores and select top `max_num` sorted_indices = np.argsort(values)[::-1][:max_num] - det = det[sorted_indices] + detections = detections[sorted_indices] landmarks = landmarks[sorted_indices] - return det, landmarks + faces = [] + for i in range(detections.shape[0]): + face_dict = { + 'bbox': detections[i, :4].astype(float).tolist(), + 'confidence': detections[i, 4].item(), + 'landmarks': landmarks[i].astype(float).tolist() + } + faces.append(face_dict) + + return faces + # TODO: below is only for testing, remove it later - - -def draw_bbox(frame, bbox, color=(0, 255, 0), thickness=2): - x1, y1, x2, y2 = bbox[:4].astype(np.int32) +def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2): + x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness) - score = bbox[4] cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) def draw_keypoints(frame, points, color=(0, 0, 255), radius=2): for (x, y) in points.astype(np.int32): - cv2.circle(frame, (x, y), radius, color, -1) + cv2.circle(frame, (int(x), int(y)), radius, color, -1) -# TODO: Remove late, just for testing - if __name__ == "__main__": detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS) + print(detector.get_info()) cap = cv2.VideoCapture(0) if not cap.isOpened(): @@ -285,14 +294,29 @@ if __name__ == "__main__": print("❌ Failed to read frame.") break - boxes_list, points_list = detector.detect(frame) + # Get face detections as list of dictionaries + faces = detector.detect(frame) - for boxes, points in zip(boxes_list, points_list): - draw_bbox(frame, boxes) + # Process each detected face + for face in faces: + # Extract bbox and landmarks from dictionary + bbox = face['bbox'] # [x1, y1, x2, y2] + landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...] + confidence = face['confidence'] - if points is not None: + # Pass bbox and confidence separately + draw_bbox(frame, bbox, confidence) + + # Convert landmarks to numpy array format if needed + if landmarks is not None and len(landmarks) > 0: + # Convert list of [x, y] pairs to numpy array + points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2) draw_keypoints(frame, points) + # Display face count + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) + cv2.imshow("FaceDetection", frame) if cv2.waitKey(1) & 0xFF == ord("q"): break diff --git a/uniface/landmark/__init__.py b/uniface/landmark/__init__.py index e69de29..2271f13 100644 --- a/uniface/landmark/__init__.py +++ b/uniface/landmark/__init__.py @@ -0,0 +1 @@ +from .model import Landmark diff --git a/uniface/landmark/model.py b/uniface/landmark/model.py index 955c7eb..39630f8 100644 --- a/uniface/landmark/model.py +++ b/uniface/landmark/model.py @@ -1,7 +1,10 @@ +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + import cv2 -import onnx -import onnxruntime as ort import numpy as np +import onnxruntime as ort from typing import Tuple @@ -15,12 +18,32 @@ __all__ = ['Landmark'] class Landmark: """ - Facial landmark detection model for predicting facial keypoints. - """ + Facial landmark detection model for predicting 106 facial keypoints using ONNX model. + + This class wraps a pretrained facial landmark model to detect 106 key facial points + such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box. + It handles model verification, input preprocessing, ONNX inference execution, + and projection of landmark coordinates back to the original image space. + + Attributes: + input_size (Tuple[int, int]): Model's expected input resolution (width, height). + input_mean (float): Mean value used for input normalization. + input_std (float): Standard deviation used for input normalization. + model_path (str): Path to the verified ONNX model file. + session (onnxruntime.InferenceSession): ONNX Runtime session for inference. + input_names (List[str]): List of input node names. + output_names (List[str]): List of output node names. + lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y). + lmk_num (int): Total number of landmark points predicted by the model (106). + Args: + model_name (LandmarkWeights): Enum specifying the landmark model to load. + input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192). + """ + def __init__( - self, - model_name: LandmarkWeights = LandmarkWeights.DEFAULT, + self, + model_name: LandmarkWeights = LandmarkWeights.DEFAULT, input_size: Tuple[int, int] = (192, 192) ) -> None: """ @@ -50,7 +73,7 @@ class Landmark: def _initialize_model(self): """ Initialize the ONNX model from the stored model path. - + Raises: RuntimeError: If the model fails to load or initialize. """ @@ -73,7 +96,7 @@ class Landmark: output_shape = self.session.get_outputs()[0].shape self.lmk_dim = 2 # x,y coordinates self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks - + Logger.info(f"Model initialized with {self.lmk_num} landmarks") except Exception as e: @@ -96,7 +119,7 @@ class Landmark: # Calculate face dimensions and center width, height = bbox[2] - bbox[0], bbox[3] - bbox[1] center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 - + # Determine scale to fit face with some margin scale = self.input_size[0] / (max(width, height) * 1.5) rotation = 0.0 @@ -105,7 +128,7 @@ class Landmark: aligned_face, transform_matrix = bbox_center_alignment( image, center, self.input_size[0], scale, rotation ) - + # Convert to blob format for inference face_blob = cv2.dnn.blobFromImage( aligned_face, @@ -114,7 +137,7 @@ class Landmark: (self.input_mean, self.input_mean, self.input_mean), swapRB=True # Convert BGR to RGB ) - + return face_blob, transform_matrix def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray: @@ -154,13 +177,13 @@ class Landmark: """ # Preprocess image face_blob, transform_matrix = self.preprocess(image, bbox) - + # Run inference raw_predictions = self.session.run( - self.output_names, + self.output_names, {self.input_names[0]: face_blob} )[0][0] - + # Postprocess to get landmarks in original image space landmarks = self.postprocess(raw_predictions, transform_matrix) @@ -172,7 +195,7 @@ class Landmark: if __name__ == "__main__": from uniface.detection import RetinaFace from uniface.constants import RetinaFaceWeights - + face_detector = RetinaFace( model_name=RetinaFaceWeights.MNET_V2, conf_thresh=0.5, diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py index b804cdb..3f98a2c 100644 --- a/uniface/recognition/base.py +++ b/uniface/recognition/base.py @@ -1,22 +1,21 @@ # Copyright 2025 Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev # GitHub: https://github.com/yakhyo -# Modified from insightface repository -import os import cv2 import numpy as np import onnxruntime as ort -from typing import Tuple, Optional, Union, List from dataclasses import dataclass +from typing import Tuple, Union, List + from uniface.log import Logger from uniface.model_store import verify_model_weights -from uniface.face_utils import compute_similarity, face_alignment +from uniface.face_utils import face_alignment from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights -__all__ = ["BaseFaceEncoder", "PreprocessConfig"] +__all__ = ["BaseModel", "PreprocessConfig"] @dataclass @@ -29,7 +28,7 @@ class PreprocessConfig: input_size: Tuple[int, int] = (112, 112) -class BaseFaceEncoder: +class BaseModel: """ Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace). """ @@ -133,7 +132,7 @@ class BaseFaceEncoder: def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray: """ - Extracts face embedding from an aligned image. + Extracts face embedding from an image. Args: image: Input face image (BGR format). @@ -150,3 +149,17 @@ class BaseFaceEncoder: embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0] return embedding + + def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray: + """ + Extracts l2 normalized face embedding vector from an image + + Args: + image: Input face image (BGR format). + landmarks: Facial landmarks (5 points for alignment). + + Returns: + Normalied face embedding vector (typically 512-dimensional). + """ + embedding = self.get_embedding(image, landmarks) + return embedding / np.linalg.norm(embedding) diff --git a/uniface/recognition/models.py b/uniface/recognition/models.py index 9b93176..593f18f 100644 --- a/uniface/recognition/models.py +++ b/uniface/recognition/models.py @@ -5,13 +5,24 @@ from typing import Optional from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights -from .base import BaseFaceEncoder, PreprocessConfig +from .base import BaseModel, PreprocessConfig __all__ = ["SphereFace", "MobileFace", "ArcFace"] -class SphereFace(BaseFaceEncoder): +class SphereFace(BaseModel): + """ + SphereFace face encoder class. + + This class loads a SphereFace model for face embedding extraction. + It supports configurable preprocessing, with a default mean/std and input size of 112x112. + + Args: + model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20. + preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization. + """ + def __init__( self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20, preprocessing: Optional[PreprocessConfig] = None @@ -25,7 +36,18 @@ class SphereFace(BaseFaceEncoder): super().__init__(model_name=model_name, preprocessing=preprocessing) -class MobileFace(BaseFaceEncoder): +class MobileFace(BaseModel): + """ + MobileFace face encoder class. + + Loads a lightweight MobileFaceNet model for fast face embedding extraction. + Default input normalization and resizing applied if preprocessing is not provided. + + Args: + model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2. + preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size. + """ + def __init__( self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2, preprocessing: Optional[PreprocessConfig] = None @@ -39,7 +61,18 @@ class MobileFace(BaseFaceEncoder): super().__init__(model_name=model_name) -class ArcFace(BaseFaceEncoder): +class ArcFace(BaseModel): + """ + ArcFace face encoder class. + + Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation. + Applies standard preprocessing unless overridden. + + Args: + model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET. + preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified. + """ + def __init__( self, model_name: ArcFaceWeights = ArcFaceWeights.MNET, preprocessing: Optional[PreprocessConfig] = None