Initial commit

2025-12-30 00:52:25 +00:00 · 2024-11-20 08:43:25 +00:00
commit 6dbf4e6b87
18 changed files with 1345 additions and 0 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -0,0 +1,64 @@
 name: Build, Test, and Publish
 on:
  push:
    branches:
      - main
    tags:
      - "v*.*.*"  # Trigger publish on version tags
  pull_request:
    branches:
      - main
 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ["3.8", "3.9", "3.10"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install .[dev] || pip install pytest  # Use extras_require if available
      - name: Run Tests
        run: |
          pytest
  publish:
    runs-on: ubuntu-latest
    needs: build  # Publish only if tests pass
    steps:
      - name: Checkout code
        uses: actions/checkout@v3
      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"  # Use a single Python version for publishing
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install build twine
      - name: Build Package
        run: python -m build
      - name: Publish to PyPI
        env:
          TWINE_USERNAME: __token__
          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
        run: twine upload dist/*
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,162 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2024 Yakhyokhuja Valikhujaev
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,239 @@
 # UniFace: All-in-One Face Analysis Library
 <div align="center">
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 ![Python](https://img.shields.io/badge/Python-3.8%2B-blue)
 [![PyPI Version](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
 [![Build Status](https://github.com/yakhyo/uniface/actions/workflows/build.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
 [![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
 [![Code Style: PEP8](https://img.shields.io/badge/code%20style-PEP8-green.svg)](https://www.python.org/dev/peps/pep-0008/)
 [![GitHub Release Downloads](https://img.shields.io/github/downloads/yakhyo/uniface/total.svg?label=Model%20Downloads)](https://github.com/yakhyo/uniface/releases)
 </div>
 **uniface** is a lightweight face detection library designed for high-performance face localization and landmark detection. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
 ---
 ## Features
 - [ ] Age and gender detection (Planned).
 - [ ] Face recognition (Planned).
 - [x] High-speed face detection using ONNX models (Added: 2024-11-20).
 - [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20).
 - [x] Easy-to-use API for inference and visualization (Added: 2024-11-20).
 ---
 ## Installation
 ### Using pip
 ```bash
 pip install uniface
 ```
 ### Local installation using pip
 **Clone the repository**
 ```bash
 git clone https://github.com/yakhyo/uniface.git
 cd uniface
 ```
 **Install using pip**
 ```bash
 pip install .
 ```
 ---
 ## Quick Start
 ### Initialize the Model
 ```python
 from uniface import RetinaFace
 # Initialize the RetinaFace model
 uniface_inference = RetinaFace(
    model="retinaface_mnet_v2",  # Model name
    conf_thresh=0.5,             # Confidence threshold
    pre_nms_topk=5000,           # Pre-NMS Top-K detections
    nms_thresh=0.4,              # NMS IoU threshold
    post_nms_topk=750            # Post-NMS Top-K detections
 )
 ```
 ### Run Inference
 Inference on image:
 ```python
 import cv2
 from uniface.visualization import draw_detections
 # Load an image
 image_path = "assets/test.jpg"
 original_image = cv2.imread(image_path)
 # Perform inference
 boxes, landmarks = uniface_inference.detect(original_image)
 # Visualize results
 draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
 # Save the output image
 output_path = "output.jpg"
 cv2.imwrite(output_path, original_image)
 print(f"Saved output image to {output_path}")
 ```
 Inference on video:
 ```python
 import cv2
 from uniface.visualization import draw_detections
 # Initialize the webcam
 cap = cv2.VideoCapture(0)
 if not cap.isOpened():
    print("Error: Unable to access the webcam.")
    exit()
 while True:
    # Capture a frame from the webcam
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to read frame.")
        break
    # Perform inference
    boxes, landmarks = uniface_inference.detect(frame)
    # Draw detections on the frame
    draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
    # Display the output
    cv2.imshow("Webcam Inference", frame)
    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
 # Release the webcam and close all OpenCV windows
 cap.release()
 cv2.destroyAllWindows()
 ```
 ---
 ### Evaluation results of available models on WiderFace
 | RetinaFace Models  | Easy       | Medium     | Hard       |
 | ------------------ | ---------- | ---------- | ---------- |
 | retinaface_mnet025 | 88.48%     | 87.02%     | 80.61%     |
 | retinaface_mnet050 | 89.42%     | 87.97%     | 82.40%     |
 | retinaface_mnet_v1 | 90.59%     | 89.14%     | 84.13%     |
 | retinaface_mnet_v2 | 91.70%     | 91.03%     | 86.60%     |
 | retinaface_r18     | 92.50%     | 91.02%     | 86.63%     |
 | retinaface_r34     | **94.16%** | **93.12%** | **88.90%** |
 ## API Reference
 ### `RetinaFace` Class
 #### Initialization
 ```python
 RetinaFace(
    model: str,
    conf_thresh: float = 0.5,
    pre_nms_topk: int = 5000,
    nms_thresh: float = 0.4,
    post_nms_topk: int = 750
 )
 ```
 **Parameters**:
 - `model` *(str)*: Name of the model to use. Supported models:
  - `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
  - `retinaface_r18`, `retinaface_r34`
 - `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections.
 - `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS.
 - `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression.
 - `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS.
 ---
 ### `detect` Method
 ```python
 detect(
    image: np.ndarray,
    max_num: int = 0,
    metric: str = "default",
    center_weight: float = 2.0
 ) -> Tuple[np.ndarray, np.ndarray]
 ```
 **Description**:
 Detects faces in the given image and returns bounding boxes and landmarks.
 **Parameters**:
 - `image` *(np.ndarray)*: Input image in BGR format.
 - `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all.
 - `metric` *(str, default="default")*: Metric for prioritizing detections:
  - `"default"`: Prioritize detections closer to the image center.
  - `"max"`: Prioritize larger bounding box areas.
 - `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces.
 **Returns**:
 - `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
 - `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
 ---
 ### Visualization Utilities
 #### `draw_detections`
 ```python
 draw_detections(
    image: np.ndarray,
    detections: Tuple[np.ndarray, np.ndarray],
    vis_threshold: float
 ) -> None
 ```
 **Description**:
 Draws bounding boxes and landmarks on the given image.
 **Parameters**:
 - `image` *(np.ndarray)*: The input image in BGR format.
 - `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks.
 - `vis_threshold` *(float)*: Minimum confidence score for visualization.
 ---
 ## Contributing
 We welcome contributions to enhance the library! Feel free to:
 - Submit bug reports or feature requests.
 - Fork the repository and create a pull request.
 ---
 ## License
 This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
 ---
 ## Acknowledgments
 - Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
 - Inspired by InsightFace and other face detection projects.
 ---
--- a/assets/test.jpg
+++ b/assets/test.jpg
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,8 @@
 pytest
 numpy
 opencv-python
 opencv-python-headless
 onnx
 onnxruntime
 requests
 torch
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -0,0 +1,23 @@
 #!/bin/bash
 # Exit on errors
 set -e
 cd "$(dirname "$0")"/..
 echo "Deleting existing release-related files..."
 rm -rf dist/ build/ *.egg-info
 pip install --upgrade pip
 pip install twine
 echo "Creating a package for the current release (PyPI compatible)..."
 python3 setup.py sdist bdist_wheel
 echo "Release package created successfully in the 'dist/' folder."
 echo "Uploading the package to PyPI..."
 twine upload dist/*
 echo "Release uploaded successfully!"
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,43 @@
 import os
 from setuptools import setup, find_packages
 # Read the README file for the long description
 long_description = ""
 if os.path.exists("README.md"):
    with open("README.md", "r", encoding="utf-8") as f:
        long_description = f.read()
 setup(
    name="uniface",
    version="0.1.0",
    packages=find_packages(),
    install_requires=[
        "numpy",
        "opencv-python",
        "onnx",
        "onnxruntime",
        "requests",
        "torch"
    ],
    extras_require={
        "dev": ["pytest"],
    },
    description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
    long_description=long_description,
    long_description_content_type="text/markdown",
    author="Yakhyokhuja Valikhujaev",
    author_email="yakhyo9696@gmail.com",
    url="https://github.com/yakhyo/uniface",
    license="MIT",
    classifiers=[
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
    keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
    python_requires=">=3.8",
 )
--- a/test.py
+++ b/test.py
@@ -0,0 +1,57 @@
 import os
 import cv2
 import numpy as np
 from uniface import RetinaFace, draw_detections
 def run_inference(image_path, save_image=False, vis_threshold=0.6):
    """
    Perform inference on an image, draw detections, and optionally save the output image.
    Args:
        image_path (str): Path to the input image.
        save_image (bool): Whether to save the output image with detections.
        vis_threshold (float): Confidence threshold for displaying detections.
    """
    # Load the image
    original_image = cv2.imread(image_path)
    if original_image is None:
        print(f"Error: Could not read image from {image_path}")
        return
    # Perform face detection
    boxes, landmarks = retinaface_inference.detect(original_image)
    # Draw detections on the image
    draw_detections(original_image, (boxes, landmarks), vis_threshold)
    # Save the output image if requested
    if save_image:
        im_name = os.path.splitext(os.path.basename(image_path))[0]
        save_name = f"{im_name}_out.jpg"
        cv2.imwrite(save_name, original_image)
        print(f"Image saved at '{save_name}'")
 if __name__ == '__main__':
    import time
    # Initialize and run the ONNX inference
    retinaface_inference = RetinaFace(
        model="retinaface_mnet_v2",
        conf_thresh=0.5,
        pre_nms_topk=5000,
        nms_thresh=0.4,
        post_nms_topk=750,
    )
    img_path = "assets/test.jpg"
    avg = 0
    for _ in range(50):
        st = time.time()
        run_inference(img_path, save_image=True, vis_threshold=0.6)
        d = time.time() - st
        print(d)
        avg += d
    print("avg", avg / 50)
--- a/tests/test_retinaface.py
+++ b/tests/test_retinaface.py
@@ -0,0 +1,78 @@
 import pytest
 import numpy as np
 from uniface import RetinaFace
@pytest.fixture
 def retinaface_model():
    """
    Fixture to initialize the RetinaFace model for testing.
    """
    return RetinaFace(
        model="retinaface_mnet_v2",
        conf_thresh=0.5,
        pre_nms_topk=5000,
        nms_thresh=0.4,
        post_nms_topk=750,
    )
 def test_model_initialization(retinaface_model):
    """
    Test that the RetinaFace model initializes correctly.
    """
    assert retinaface_model is not None, "Model initialization failed."
 def test_inference_on_640x640_image(retinaface_model):
    """
    Test inference on a 640x640 BGR image.
    """
    # Generate a mock 640x640 BGR image
    mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
    # Run inference
    detections, landmarks = retinaface_model.detect(mock_image)
    # Check output types
    assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
    assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
    # Check that detections have the expected shape
    if detections.size > 0:  # If faces are detected
        assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
    # Check landmarks shape
    if landmarks.size > 0:
        assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
 def test_confidence_threshold(retinaface_model):
    """
    Test that detections respect the confidence threshold.
    """
    # Generate a mock 640x640 BGR image
    mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
    # Run inference
    detections, _ = retinaface_model.detect(mock_image)
    # Ensure all detections have confidence scores above the threshold
    if detections.size > 0:  # If faces are detected
        confidence_scores = detections[:, 4]
        assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
 def test_no_faces_detected(retinaface_model):
    """
    Test inference on an image without detectable faces.
    """
    # Generate an empty (black) 640x640 image
    empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
    # Run inference
    detections, landmarks = retinaface_model.detect(empty_image)
    # Ensure no detections or landmarks are found
    assert detections.size == 0, "Detections should be empty for a blank image."
    assert landmarks.size == 0, "Landmarks should be empty for a blank image."
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -0,0 +1,28 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 #
 # Licensed under the MIT License.
 # You may obtain a copy of the License at
 #
 #     https://opensource.org/licenses/MIT
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from uniface.retinaface import RetinaFace
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.version import __version__, __author__
 from uniface.visualization import draw_detections
 __all__ = [
    "__version__",
    "__author__"
    "RetinaFace",
    "Logger",
    "verify_model_weights",
    "draw_detections"
 ]
--- a/uniface/common.py
+++ b/uniface/common.py
@@ -0,0 +1,178 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
 import cv2
 import math
 import itertools
 import numpy as np
 import torch
 from typing import Tuple, List
 def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
    """
    Resize an image to fit within a target shape while keeping its aspect ratio.
    Args:
        frame (np.ndarray): Input image.
        target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
    Returns:
        Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
    """
    width, height = target_shape
    # Aspect-ratio preserving resize
    im_ratio = float(frame.shape[0]) / frame.shape[1]
    model_ratio = height / width
    if im_ratio > model_ratio:
        new_height = height
        new_width = int(new_height / im_ratio)
    else:
        new_width = width
        new_height = int(new_width * im_ratio)
    resize_factor = float(new_height) / frame.shape[0]
    resized_frame = cv2.resize(frame, (new_width, new_height))
    # Create blank image and place resized image on it
    image = np.zeros((height, width, 3), dtype=np.uint8)
    image[:new_height, :new_width, :] = resized_frame
    return image, resize_factor
 def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
    """
    Generate anchor boxes for a given image size.
    Args:
        image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
    Returns:
        torch.Tensor: Anchor box coordinates as a tensor.
    """
    image_size = image_size
    steps = [8, 16, 32]
    min_sizes = [[16, 32], [64, 128], [256, 512]]
    anchors = []
    feature_maps = [
        [
            math.ceil(image_size[0] / step),
            math.ceil(image_size[1] / step)
        ] for step in steps
    ]
    for k, (map_height, map_width) in enumerate(feature_maps):
        step = steps[k]
        for i, j in itertools.product(range(map_height), range(map_width)):
            for min_size in min_sizes[k]:
                s_kx = min_size / image_size[1]
                s_ky = min_size / image_size[0]
                dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
                dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
                for cy, cx in itertools.product(dense_cy, dense_cx):
                    anchors += [cx, cy, s_kx, s_ky]
    output = torch.Tensor(anchors).view(-1, 4)
    return output
 def nms(dets: List[np.ndarray], threshold: float):
    """
    Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
    Args:
        dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
        threshold (float): IoU threshold for suppression.
    Returns:
        list: Indices of bounding boxes retained after suppression.
    """
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]
    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= threshold)[0]
        order = order[inds + 1]
    return keep
 def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
    """
    Decode locations from predictions using priors to undo
    the encoding done for offset regression at train time.
    Args:
        loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
        priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
        variances (list[float]): Variances of prior boxes
    Returns:
        tensor: Decoded bounding box predictions
    """
    # Compute centers of predicted boxes
    cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
    # Compute widths and heights of predicted boxes
    wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
    # Convert center, size to corner coordinates
    boxes = torch.empty_like(loc)
    boxes[:, :2] = cxcy - wh / 2  # xmin, ymin
    boxes[:, 2:] = cxcy + wh / 2  # xmax, ymax
    return boxes
 def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
    """
    Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
    Args:
        predictions (tensor): Landmark predictions for localization layers.
            Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
        priors (tensor): Prior boxes in center-offset form.
            Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
        variances (list[float]): Variances of the prior boxes to scale the decoded values.
    Returns:
        landmarks (tensor): Decoded landmark predictions.
            Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
    """
    # Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
    predictions = predictions.view(predictions.size(0), 5, 2)
    # Perform the same operation on all landmark pairs at once
    landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
    # Flatten back to [num_priors, 10]
    landmarks = landmarks.view(landmarks.size(0), -1)
    return landmarks
--- a/uniface/constants.py
+++ b/uniface/constants.py
@@ -0,0 +1,26 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
 from typing import Dict
 MODEL_URLS: Dict[str, str] = {
    'retinaface_mnet025': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.25.onnx',
    'retinaface_mnet050': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.50.onnx',
    'retinaface_mnet_v1': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1.onnx',
    'retinaface_mnet_v2': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv2.onnx',
    'retinaface_r18': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r18.onnx',
    'retinaface_r34': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r34.onnx'
 }
 MODEL_SHA256: Dict[str, str] = {
    'retinaface_mnet025': 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
    'retinaface_mnet050': 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
    'retinaface_mnet_v1': '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
    'retinaface_mnet_v2': '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
    'retinaface_r18': 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
    'retinaface_r34': 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630'
 }
 CHUNK_SIZE = 8192
--- a/uniface/log.py
+++ b/uniface/log.py
@@ -0,0 +1,7 @@
 import logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
 )
 Logger = logging.getLogger("retinaface")
--- a/uniface/model_store.py
+++ b/uniface/model_store.py
@@ -0,0 +1,102 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
 import os
 import hashlib
 import requests
 from uniface.log import Logger
 import uniface.constants as const
 def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
    """
    Ensures model weights are available by downloading if missing and verifying integrity with a SHA-256 hash.
    Checks if the specified model weights file exists in `root`. If missing, downloads from a predefined URL.
    The file is then verified using its SHA-256 hash. If verification fails, the corrupted file is deleted,
    and an error is raised.
    Args:
        model_name (str): Name of the model weights to verify or download.
        root (str, optional): Directory to store the model weights. Defaults to '~/.uniface/models'.
    Returns:
        str: Path to the verified model weights file.
    Raises:
        ValueError: If the model is not found or if verification fails.
        ConnectionError: If downloading the file fails.
    Examples:
        >>> # Download and verify 'retinaface_mnet025' weights
        >>> verify_model_weights('retinaface_mnet025')
        '/home/user/.uniface/models/retinaface_mnet025.onnx'
        >>> # Use a custom directory
        >>> verify_model_weights('retinaface_r34', root='/custom/dir')
        '/custom/dir/retinaface_r34.onnx'
    """
    root = os.path.expanduser(root)
    os.makedirs(root, exist_ok=True)
    model_path = os.path.join(root, f'{model_name}.onnx')
    if not os.path.exists(model_path):
        url = const.MODEL_URLS.get(model_name)
        if not url:
            Logger.error(f"No URL found for model '{model_name}'")
            raise ValueError(f"No URL found for model '{model_name}'")
        Logger.info(f"Downloading '{model_name}' from {url}")
        download_file(url, model_path)
        Logger.info(f"Successfully '{model_name}' downloaded to {model_path}")
    expected_hash = const.MODEL_SHA256.get(model_name)
    if expected_hash and not verify_file_hash(model_path, expected_hash):
        os.remove(model_path)  # Remove corrupted file
        Logger.warning("Corrupted weight detected. Removing...")
        raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
    return model_path
 def download_file(url: str, dest_path: str) -> None:
    """Download a file from a URL in chunks and save it to the destination path."""
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(dest_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
                if chunk:
                    file.write(chunk)
    except requests.RequestException as e:
        raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
 def verify_file_hash(file_path: str, expected_hash: str) -> bool:
    """Compute the SHA-256 hash of the file and compare it with the expected hash."""
    file_hash = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
            file_hash.update(chunk)
    actual_hash = file_hash.hexdigest()
    if actual_hash != expected_hash:
        Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
    return actual_hash == expected_hash
 if __name__ == "__main__":
    model_names = [
        'retinaface_mnet025',
        'retinaface_mnet050',
        'retinaface_mnet_v1',
        'retinaface_mnet_v2',
        'retinaface_r18',
        'retinaface_r34'
    ]
    # Download each model in the list
    for model_name in model_names:
        model_path = verify_model_weights(model_name)
--- a/uniface/retinaface.py
+++ b/uniface/retinaface.py
@@ -0,0 +1,256 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
 import os
 import cv2
 import numpy as np
 import onnxruntime as ort
 import torch
 from typing import Tuple, List, Optional, Literal
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.common import (
    nms,
    resize_image,
    decode_boxes,
    generate_anchors,
    decode_landmarks
 )
 class RetinaFace:
    """
    A class for face detection using the RetinaFace model.
    Args:
        model (str): Path or identifier of the model weights.
        conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
        nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
        pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
        post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
        dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
        input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
    Attributes:
        conf_thresh (float): Confidence threshold for filtering detections.
        nms_thresh (float): Threshold for NMS to remove duplicate detections.
        pre_nms_topk (int): Maximum detections to consider before applying NMS.
        post_nms_topk (int): Maximum detections retained after applying NMS.
        dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
        input_size (Tuple[int, int]): The model's input image size.
        _model_path (str): Path to the model weights.
        _priors (torch.Tensor): Precomputed anchor boxes for static input size.
    """
    def __init__(
        self,
        model: str,
        conf_thresh: float = 0.5,
        nms_thresh: float = 0.4,
        pre_nms_topk: int = 5000,
        post_nms_topk: int = 750,
        dynamic_size: Optional[bool] = False,
        input_size: Optional[Tuple[int, int]] = (640, 640),  # Default input size if dynamic_size=False
    ) -> None:
        self.conf_thresh = conf_thresh
        self.nms_thresh = nms_thresh
        self.pre_nms_topk = pre_nms_topk
        self.post_nms_topk = post_nms_topk
        self.dynamic_size = dynamic_size
        self.input_size = input_size
        Logger.info(
            f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
            f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
            f"input_size={input_size}"
        )
        # Get path to model weights
        self._model_path = verify_model_weights(model)
        Logger.info(f"Verified model weights located at: {self._model_path}")
        # Precompute anchors if using static size
        if not dynamic_size and input_size is not None:
            self._priors = generate_anchors(image_size=input_size)
            Logger.debug("Generated anchors for static input size.")
        # Initialize model
        self._initialize_model(self._model_path)
    def _initialize_model(self, model_path: str) -> None:
        """
        Initializes an ONNX model session from the given path.
        Args:
            model_path (str): The file path to the ONNX model.
        Raises:
            RuntimeError: If the model fails to load, logs an error and raises an exception.
        """
        try:
            self.session = ort.InferenceSession(model_path)
            self.input_name = self.session.get_inputs()[0].name
            Logger.info(f"Successfully initialized the model from {model_path}")
        except Exception as e:
            Logger.error(f"Failed to load model from '{model_path}': {e}")
            raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
    def preprocess(self, image: np.ndarray) -> np.ndarray:
        """Preprocess input image for model inference.
        Args:
            image (np.ndarray): Input image.
        Returns:
            np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
        """
        image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
        image = image.transpose(2, 0, 1)  # HWC to CHW
        image = np.expand_dims(image, axis=0)  # Add batch dimension (1, C, H, W)
        return image
    def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
        """Perform model inference on the preprocessed image tensor.
        Args:
            input_tensor (np.ndarray): Preprocessed input tensor.
        Returns:
            Tuple[np.ndarray, np.ndarray]: Raw model outputs.
        """
        return self.session.run(None, {self.input_name: input_tensor})
    def detect(
        self,
        image: np.ndarray,
        max_num: Optional[int] = 0,
        metric: Literal["default", "max"] = "default",
        center_weight: Optional[float] = 2.0
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Perform face detection on an input image and return bounding boxes and landmarks.
        Args:
            image (np.ndarray): Input image as a NumPy array of shape (height, width, channels).
            max_num (int, optional): Maximum number of detections to return. Defaults to 1.
            metric (str, optional): Metric for ranking detections when `max_num` is specified. 
                Options:
                - "default": Prioritize detections closer to the image center.
                - "max": Prioritize detections with larger bounding box areas.
            center_weight (float, optional): Weight for penalizing detections farther from the image center 
                when using the "default" metric. Defaults to 2.0.
        Returns:
            Tuple[np.ndarray, np.ndarray]: Detection results containing:
                - detections (np.ndarray): Array of detected bounding boxes with confidence scores.
                Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
                - landmarks (np.ndarray): Array of detected facial landmarks.
                Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
        """
        if self.dynamic_size:
            height, width, _ = image.shape
            self._priors = generate_anchors(image_size=(height, width))  # generate anchors for each input image
            resize_factor = 1.0  # No resizing
        else:
            image, resize_factor = resize_image(image, target_shape=self.input_size)
        height, width, _ = image.shape
        image_tensor = self.preprocess(image)
        # ONNXRuntime inference
        outputs = self.inference(image_tensor)
        # Postprocessing
        detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
        if max_num > 0 and detections.shape[0] > max_num:
            # Calculate area of detections
            areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
            # Calculate offsets from image center
            center = (height // 2, width // 2)
            offsets = np.vstack([
                (detections[:, 0] + detections[:, 2]) / 2 - center[1],
                (detections[:, 1] + detections[:, 3]) / 2 - center[0]
            ])
            offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
            # Calculate scores based on the chosen metric
            if metric == 'max':
                scores = areas
            else:
                scores = areas - offset_dist_squared * center_weight
            # Sort by scores and select top `max_num`
            sorted_indices = np.argsort(scores)[::-1][:max_num]
            detections = detections[sorted_indices]
            landmarks = landmarks[sorted_indices]
        return detections, landmarks
    def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
        """
        Process the model outputs into final detection results.
        Args:
            outputs (List[np.ndarray]): Raw outputs from the detection model.
                - outputs[0]: Location predictions (bounding box coordinates).
                - outputs[1]: Class confidence scores.
                - outputs[2]: Landmark predictions.
            resize_factor (float): Factor used to resize the input image during preprocessing.
            shape (Tuple[int, int]): Original shape of the image as (height, width).
        Returns:
            Tuple[np.ndarray, np.ndarray]: Processed results containing:
                - detections (np.ndarray): Array of detected bounding boxes with confidence scores.
                Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
                - landmarks (np.ndarray): Array of detected facial landmarks.
                Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
        """
        loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
        # Decode boxes and landmarks
        boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
        landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
        boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
        # Extract confidence scores for the face class
        scores = conf[:, 1]
        mask = scores > self.conf_thresh
        # Filter by confidence threshold
        boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
        # Sort by scores
        order = scores.argsort()[::-1][:self.pre_nms_topk]
        boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
        # Apply NMS
        detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
        keep = nms(detections, self.nms_thresh)
        detections, landmarks = detections[keep], landmarks[keep]
        # Keep top-k detections
        detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
        landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
        return detections, landmarks
    def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
        """Scale bounding boxes and landmarks to the original image size."""
        bbox_scale = np.array([shape[0], shape[1]] * 2)
        boxes = boxes * bbox_scale / resize_factor
        landmark_scale = np.array([shape[0], shape[1]] * 5)
        landmarks = landmarks * landmark_scale / resize_factor
        return boxes, landmarks
--- a/uniface/version.py
+++ b/uniface/version.py
@@ -0,0 +1,15 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 #
 # Licensed under the MIT License.
 # You may obtain a copy of the License at
 #
 #     https://opensource.org/licenses/MIT
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 __version__ = "0.1.2"
 __author__ = "Yakhyokhuja Valikhujaev"
--- a/uniface/visualization.py
+++ b/uniface/visualization.py
@@ -0,0 +1,38 @@
 # Copyright 2024 Yakhyokhuja Valikhujaev
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
 import cv2
 import numpy as np
 def draw_detections(image, detections, vis_threshold=0.6):
    """
    Draw bounding boxes and landmarks on the image.
    Args:
        image (ndarray): Image to draw detections on.
        detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
        vis_threshold (float): Confidence threshold for filtering detections.
    """
    _colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
    # Unpack detections
    boxes, landmarks = detections
    scores = boxes[:, 4]
    # Filter detections by confidence threshold
    filtered = scores >= vis_threshold
    boxes = boxes[filtered, :4].astype(np.int32)
    landmarks = landmarks[filtered]
    scores = scores[filtered]
    print(f"#faces: {len(scores)}")
    # Draw bounding boxes, scores, and landmarks
    for box, score, landmark in zip(boxes, scores, landmarks):
        cv2.rectangle(image, box[:2], box[2:], (0, 0, 255), 2)
        cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        for point, color in zip(landmark, _colors):
            cv2.circle(image, tuple(point), 2, color, -1)