commit 6dbf4e6b8799c1485f72754b20467aa0ca1cdb0a Author: yakhyo Date: Wed Nov 20 08:43:25 2024 +0000 Initial commit diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..0f86164 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,64 @@ +name: Build, Test, and Publish + +on: + push: + branches: + - main + tags: + - "v*.*.*" # Trigger publish on version tags + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10"] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[dev] || pip install pytest # Use extras_require if available + + - name: Run Tests + run: | + pytest + + publish: + runs-on: ubuntu-latest + needs: build # Publish only if tests pass + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" # Use a single Python version for publishing + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install build twine + + - name: Build Package + run: python -m build + + - name: Publish to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: twine upload dist/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..82f9275 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7646fd5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Yakhyokhuja Valikhujaev + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..c4e406e --- /dev/null +++ b/README.md @@ -0,0 +1,239 @@ +# UniFace: All-in-One Face Analysis Library + +
+ +[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +![Python](https://img.shields.io/badge/Python-3.8%2B-blue) +[![PyPI Version](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/) +[![Build Status](https://github.com/yakhyo/uniface/actions/workflows/build.yml/badge.svg)](https://github.com/yakhyo/uniface/actions) +[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface) +[![Code Style: PEP8](https://img.shields.io/badge/code%20style-PEP8-green.svg)](https://www.python.org/dev/peps/pep-0008/) +[![GitHub Release Downloads](https://img.shields.io/github/downloads/yakhyo/uniface/total.svg?label=Model%20Downloads)](https://github.com/yakhyo/uniface/releases) + +
+ +**uniface** is a lightweight face detection library designed for high-performance face localization and landmark detection. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch. + +--- + +## Features +- [ ] Age and gender detection (Planned). +- [ ] Face recognition (Planned). +- [x] High-speed face detection using ONNX models (Added: 2024-11-20). +- [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20). +- [x] Easy-to-use API for inference and visualization (Added: 2024-11-20). + +--- + +## Installation + +### Using pip + +```bash +pip install uniface +``` + +### Local installation using pip + +**Clone the repository** + +```bash +git clone https://github.com/yakhyo/uniface.git +cd uniface +``` + +**Install using pip** + +```bash +pip install . +``` + +--- + +## Quick Start + +### Initialize the Model + +```python +from uniface import RetinaFace + +# Initialize the RetinaFace model +uniface_inference = RetinaFace( + model="retinaface_mnet_v2", # Model name + conf_thresh=0.5, # Confidence threshold + pre_nms_topk=5000, # Pre-NMS Top-K detections + nms_thresh=0.4, # NMS IoU threshold + post_nms_topk=750 # Post-NMS Top-K detections +) +``` + +### Run Inference + +Inference on image: + +```python +import cv2 +from uniface.visualization import draw_detections + +# Load an image +image_path = "assets/test.jpg" +original_image = cv2.imread(image_path) + +# Perform inference +boxes, landmarks = uniface_inference.detect(original_image) + +# Visualize results +draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6) + +# Save the output image +output_path = "output.jpg" +cv2.imwrite(output_path, original_image) +print(f"Saved output image to {output_path}") +``` + +Inference on video: + +```python +import cv2 +from uniface.visualization import draw_detections + +# Initialize the webcam +cap = cv2.VideoCapture(0) + +if not cap.isOpened(): + print("Error: Unable to access the webcam.") + exit() + +while True: + # Capture a frame from the webcam + ret, frame = cap.read() + if not ret: + print("Error: Failed to read frame.") + break + + # Perform inference + boxes, landmarks = uniface_inference.detect(frame) + + # Draw detections on the frame + draw_detections(frame, (boxes, landmarks), vis_threshold=0.6) + + # Display the output + cv2.imshow("Webcam Inference", frame) + + # Exit if 'q' is pressed + if cv2.waitKey(1) & 0xFF == ord('q'): + break + +# Release the webcam and close all OpenCV windows +cap.release() +cv2.destroyAllWindows() +``` + +--- + +### Evaluation results of available models on WiderFace + +| RetinaFace Models | Easy | Medium | Hard | +| ------------------ | ---------- | ---------- | ---------- | +| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | +| retinaface_mnet050 | 89.42% | 87.97% | 82.40% | +| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% | +| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | +| retinaface_r18 | 92.50% | 91.02% | 86.63% | +| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** | + +## API Reference + +### `RetinaFace` Class + +#### Initialization +```python +RetinaFace( + model: str, + conf_thresh: float = 0.5, + pre_nms_topk: int = 5000, + nms_thresh: float = 0.4, + post_nms_topk: int = 750 +) +``` + +**Parameters**: +- `model` *(str)*: Name of the model to use. Supported models: + - `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2` + - `retinaface_r18`, `retinaface_r34` +- `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections. +- `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS. +- `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression. +- `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS. + +--- + +### `detect` Method +```python +detect( + image: np.ndarray, + max_num: int = 0, + metric: str = "default", + center_weight: float = 2.0 +) -> Tuple[np.ndarray, np.ndarray] +``` + +**Description**: +Detects faces in the given image and returns bounding boxes and landmarks. + +**Parameters**: +- `image` *(np.ndarray)*: Input image in BGR format. +- `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all. +- `metric` *(str, default="default")*: Metric for prioritizing detections: + - `"default"`: Prioritize detections closer to the image center. + - `"max"`: Prioritize larger bounding box areas. +- `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces. + +**Returns**: +- `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`. +- `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`. + +--- + +### Visualization Utilities + +#### `draw_detections` +```python +draw_detections( + image: np.ndarray, + detections: Tuple[np.ndarray, np.ndarray], + vis_threshold: float +) -> None +``` + +**Description**: +Draws bounding boxes and landmarks on the given image. + +**Parameters**: +- `image` *(np.ndarray)*: The input image in BGR format. +- `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks. +- `vis_threshold` *(float)*: Minimum confidence score for visualization. + +--- + +## Contributing + +We welcome contributions to enhance the library! Feel free to: + +- Submit bug reports or feature requests. +- Fork the repository and create a pull request. + +--- + +## License + +This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. + +--- + +## Acknowledgments + +- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)). +- Inspired by InsightFace and other face detection projects. + +--- diff --git a/assets/test.jpg b/assets/test.jpg new file mode 100644 index 0000000..4559d9f Binary files /dev/null and b/assets/test.jpg differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5ce9014 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +pytest +numpy +opencv-python +opencv-python-headless +onnx +onnxruntime +requests +torch diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100644 index 0000000..ec30f3b --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Exit on errors +set -e + +cd "$(dirname "$0")"/.. + +echo "Deleting existing release-related files..." +rm -rf dist/ build/ *.egg-info + +pip install --upgrade pip +pip install twine + +echo "Creating a package for the current release (PyPI compatible)..." +python3 setup.py sdist bdist_wheel + +echo "Release package created successfully in the 'dist/' folder." + + +echo "Uploading the package to PyPI..." +twine upload dist/* + +echo "Release uploaded successfully!" \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9261470 --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +import os +from setuptools import setup, find_packages + +# Read the README file for the long description +long_description = "" +if os.path.exists("README.md"): + with open("README.md", "r", encoding="utf-8") as f: + long_description = f.read() + +setup( + name="uniface", + version="0.1.0", + packages=find_packages(), + install_requires=[ + "numpy", + "opencv-python", + "onnx", + "onnxruntime", + "requests", + "torch" + ], + extras_require={ + "dev": ["pytest"], + }, + description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection", + long_description=long_description, + long_description_content_type="text/markdown", + author="Yakhyokhuja Valikhujaev", + author_email="yakhyo9696@gmail.com", + url="https://github.com/yakhyo/uniface", + license="MIT", + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface", + python_requires=">=3.8", +) diff --git a/test.py b/test.py new file mode 100644 index 0000000..c8f241d --- /dev/null +++ b/test.py @@ -0,0 +1,57 @@ +import os +import cv2 +import numpy as np + +from uniface import RetinaFace, draw_detections + + +def run_inference(image_path, save_image=False, vis_threshold=0.6): + """ + Perform inference on an image, draw detections, and optionally save the output image. + + Args: + image_path (str): Path to the input image. + save_image (bool): Whether to save the output image with detections. + vis_threshold (float): Confidence threshold for displaying detections. + """ + # Load the image + original_image = cv2.imread(image_path) + if original_image is None: + print(f"Error: Could not read image from {image_path}") + return + + # Perform face detection + boxes, landmarks = retinaface_inference.detect(original_image) + + # Draw detections on the image + draw_detections(original_image, (boxes, landmarks), vis_threshold) + + # Save the output image if requested + if save_image: + im_name = os.path.splitext(os.path.basename(image_path))[0] + save_name = f"{im_name}_out.jpg" + cv2.imwrite(save_name, original_image) + print(f"Image saved at '{save_name}'") + + +if __name__ == '__main__': + import time + + # Initialize and run the ONNX inference + retinaface_inference = RetinaFace( + model="retinaface_mnet_v2", + conf_thresh=0.5, + pre_nms_topk=5000, + nms_thresh=0.4, + post_nms_topk=750, + ) + + img_path = "assets/test.jpg" + avg = 0 + for _ in range(50): + st = time.time() + run_inference(img_path, save_image=True, vis_threshold=0.6) + d = time.time() - st + print(d) + avg += d + print("avg", avg / 50) diff --git a/tests/test_retinaface.py b/tests/test_retinaface.py new file mode 100644 index 0000000..e49ab17 --- /dev/null +++ b/tests/test_retinaface.py @@ -0,0 +1,78 @@ +import pytest +import numpy as np +from uniface import RetinaFace + + +@pytest.fixture +def retinaface_model(): + """ + Fixture to initialize the RetinaFace model for testing. + """ + return RetinaFace( + model="retinaface_mnet_v2", + conf_thresh=0.5, + pre_nms_topk=5000, + nms_thresh=0.4, + post_nms_topk=750, + ) + + +def test_model_initialization(retinaface_model): + """ + Test that the RetinaFace model initializes correctly. + """ + assert retinaface_model is not None, "Model initialization failed." + + +def test_inference_on_640x640_image(retinaface_model): + """ + Test inference on a 640x640 BGR image. + """ + # Generate a mock 640x640 BGR image + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + # Run inference + detections, landmarks = retinaface_model.detect(mock_image) + + # Check output types + assert isinstance(detections, np.ndarray), "Detections should be a numpy array." + assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array." + + # Check that detections have the expected shape + if detections.size > 0: # If faces are detected + assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)." + + # Check landmarks shape + if landmarks.size > 0: + assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)." + + +def test_confidence_threshold(retinaface_model): + """ + Test that detections respect the confidence threshold. + """ + # Generate a mock 640x640 BGR image + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + # Run inference + detections, _ = retinaface_model.detect(mock_image) + + # Ensure all detections have confidence scores above the threshold + if detections.size > 0: # If faces are detected + confidence_scores = detections[:, 4] + assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold." + + +def test_no_faces_detected(retinaface_model): + """ + Test inference on an image without detectable faces. + """ + # Generate an empty (black) 640x640 image + empty_image = np.zeros((640, 640, 3), dtype=np.uint8) + + # Run inference + detections, landmarks = retinaface_model.detect(empty_image) + + # Ensure no detections or landmarks are found + assert detections.size == 0, "Detections should be empty for a blank image." + assert landmarks.size == 0, "Landmarks should be empty for a blank image." diff --git a/uniface/__init__.py b/uniface/__init__.py new file mode 100644 index 0000000..006c1c2 --- /dev/null +++ b/uniface/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# +# Licensed under the MIT License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/MIT +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from uniface.retinaface import RetinaFace +from uniface.log import Logger +from uniface.model_store import verify_model_weights +from uniface.version import __version__, __author__ +from uniface.visualization import draw_detections + +__all__ = [ + "__version__", + "__author__" + "RetinaFace", + "Logger", + "verify_model_weights", + "draw_detections" +] diff --git a/uniface/common.py b/uniface/common.py new file mode 100644 index 0000000..b65a966 --- /dev/null +++ b/uniface/common.py @@ -0,0 +1,178 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +import cv2 +import math +import itertools +import numpy as np + +import torch +from typing import Tuple, List + + +def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]: + """ + Resize an image to fit within a target shape while keeping its aspect ratio. + + Args: + frame (np.ndarray): Input image. + target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640). + + Returns: + Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor. + """ + width, height = target_shape + + # Aspect-ratio preserving resize + im_ratio = float(frame.shape[0]) / frame.shape[1] + model_ratio = height / width + if im_ratio > model_ratio: + new_height = height + new_width = int(new_height / im_ratio) + else: + new_width = width + new_height = int(new_width * im_ratio) + + resize_factor = float(new_height) / frame.shape[0] + resized_frame = cv2.resize(frame, (new_width, new_height)) + + # Create blank image and place resized image on it + image = np.zeros((height, width, 3), dtype=np.uint8) + image[:new_height, :new_width, :] = resized_frame + + return image, resize_factor + + +def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor: + """ + Generate anchor boxes for a given image size. + + Args: + image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640). + + Returns: + torch.Tensor: Anchor box coordinates as a tensor. + """ + image_size = image_size + + steps = [8, 16, 32] + min_sizes = [[16, 32], [64, 128], [256, 512]] + + anchors = [] + feature_maps = [ + [ + math.ceil(image_size[0] / step), + math.ceil(image_size[1] / step) + ] for step in steps + ] + + for k, (map_height, map_width) in enumerate(feature_maps): + step = steps[k] + for i, j in itertools.product(range(map_height), range(map_width)): + for min_size in min_sizes[k]: + s_kx = min_size / image_size[1] + s_ky = min_size / image_size[0] + + dense_cx = [x * step / image_size[1] for x in [j + 0.5]] + dense_cy = [y * step / image_size[0] for y in [i + 0.5]] + for cy, cx in itertools.product(dense_cy, dense_cx): + anchors += [cx, cy, s_kx, s_ky] + + output = torch.Tensor(anchors).view(-1, 4) + return output + + +def nms(dets: List[np.ndarray], threshold: float): + """ + Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold. + + Args: + dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score]. + threshold (float): IoU threshold for suppression. + + Returns: + list: Indices of bounding boxes retained after suppression. + """ + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= threshold)[0] + order = order[inds + 1] + + return keep + + +def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor: + """ + Decode locations from predictions using priors to undo + the encoding done for offset regression at train time. + + Args: + loc (tensor): Location predictions for loc layers, shape: [num_priors, 4] + priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4] + variances (list[float]): Variances of prior boxes + + Returns: + tensor: Decoded bounding box predictions + """ + # Compute centers of predicted boxes + cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:] + + # Compute widths and heights of predicted boxes + wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]) + + # Convert center, size to corner coordinates + boxes = torch.empty_like(loc) + boxes[:, :2] = cxcy - wh / 2 # xmin, ymin + boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax + + return boxes + + +def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor: + """ + Decode landmarks from predictions using prior boxes to reverse the encoding done during training. + + Args: + predictions (tensor): Landmark predictions for localization layers. + Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs. + priors (tensor): Prior boxes in center-offset form. + Shape: [num_priors, 4], where each prior has (cx, cy, width, height). + variances (list[float]): Variances of the prior boxes to scale the decoded values. + + Returns: + landmarks (tensor): Decoded landmark predictions. + Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks. + """ + + # Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch + predictions = predictions.view(predictions.size(0), 5, 2) + + # Perform the same operation on all landmark pairs at once + landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1) + + # Flatten back to [num_priors, 10] + landmarks = landmarks.view(landmarks.size(0), -1) + + return landmarks diff --git a/uniface/constants.py b/uniface/constants.py new file mode 100644 index 0000000..b4131d5 --- /dev/null +++ b/uniface/constants.py @@ -0,0 +1,26 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +from typing import Dict + + +MODEL_URLS: Dict[str, str] = { + 'retinaface_mnet025': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.25.onnx', + 'retinaface_mnet050': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.50.onnx', + 'retinaface_mnet_v1': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1.onnx', + 'retinaface_mnet_v2': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv2.onnx', + 'retinaface_r18': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r18.onnx', + 'retinaface_r34': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r34.onnx' +} + +MODEL_SHA256: Dict[str, str] = { + 'retinaface_mnet025': 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5', + 'retinaface_mnet050': 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37', + 'retinaface_mnet_v1': '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153', + 'retinaface_mnet_v2': '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757', + 'retinaface_r18': 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d', + 'retinaface_r34': 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630' +} + +CHUNK_SIZE = 8192 diff --git a/uniface/log.py b/uniface/log.py new file mode 100644 index 0000000..3ac8c9f --- /dev/null +++ b/uniface/log.py @@ -0,0 +1,7 @@ +import logging + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s" +) +Logger = logging.getLogger("retinaface") diff --git a/uniface/model_store.py b/uniface/model_store.py new file mode 100644 index 0000000..d267dad --- /dev/null +++ b/uniface/model_store.py @@ -0,0 +1,102 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +import os +import hashlib +import requests + +from uniface.log import Logger +import uniface.constants as const + + +def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str: + """ + Ensures model weights are available by downloading if missing and verifying integrity with a SHA-256 hash. + + Checks if the specified model weights file exists in `root`. If missing, downloads from a predefined URL. + The file is then verified using its SHA-256 hash. If verification fails, the corrupted file is deleted, + and an error is raised. + + Args: + model_name (str): Name of the model weights to verify or download. + root (str, optional): Directory to store the model weights. Defaults to '~/.uniface/models'. + + Returns: + str: Path to the verified model weights file. + + Raises: + ValueError: If the model is not found or if verification fails. + ConnectionError: If downloading the file fails. + + Examples: + >>> # Download and verify 'retinaface_mnet025' weights + >>> verify_model_weights('retinaface_mnet025') + '/home/user/.uniface/models/retinaface_mnet025.onnx' + + >>> # Use a custom directory + >>> verify_model_weights('retinaface_r34', root='/custom/dir') + '/custom/dir/retinaface_r34.onnx' + """ + + root = os.path.expanduser(root) + os.makedirs(root, exist_ok=True) + model_path = os.path.join(root, f'{model_name}.onnx') + + if not os.path.exists(model_path): + url = const.MODEL_URLS.get(model_name) + if not url: + Logger.error(f"No URL found for model '{model_name}'") + raise ValueError(f"No URL found for model '{model_name}'") + + Logger.info(f"Downloading '{model_name}' from {url}") + download_file(url, model_path) + Logger.info(f"Successfully '{model_name}' downloaded to {model_path}") + + expected_hash = const.MODEL_SHA256.get(model_name) + if expected_hash and not verify_file_hash(model_path, expected_hash): + os.remove(model_path) # Remove corrupted file + Logger.warning("Corrupted weight detected. Removing...") + raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.") + + return model_path + + +def download_file(url: str, dest_path: str) -> None: + """Download a file from a URL in chunks and save it to the destination path.""" + try: + response = requests.get(url, stream=True) + response.raise_for_status() + with open(dest_path, "wb") as file: + for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE): + if chunk: + file.write(chunk) + except requests.RequestException as e: + raise ConnectionError(f"Failed to download file from {url}. Error: {e}") + + +def verify_file_hash(file_path: str, expected_hash: str) -> bool: + """Compute the SHA-256 hash of the file and compare it with the expected hash.""" + file_hash = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""): + file_hash.update(chunk) + actual_hash = file_hash.hexdigest() + if actual_hash != expected_hash: + Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}") + return actual_hash == expected_hash + + +if __name__ == "__main__": + model_names = [ + 'retinaface_mnet025', + 'retinaface_mnet050', + 'retinaface_mnet_v1', + 'retinaface_mnet_v2', + 'retinaface_r18', + 'retinaface_r34' + ] + + # Download each model in the list + for model_name in model_names: + model_path = verify_model_weights(model_name) diff --git a/uniface/retinaface.py b/uniface/retinaface.py new file mode 100644 index 0000000..570f5c3 --- /dev/null +++ b/uniface/retinaface.py @@ -0,0 +1,256 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +import os +import cv2 +import numpy as np +import onnxruntime as ort + +import torch +from typing import Tuple, List, Optional, Literal + +from uniface.log import Logger +from uniface.model_store import verify_model_weights + +from uniface.common import ( + nms, + resize_image, + decode_boxes, + generate_anchors, + decode_landmarks +) + + +class RetinaFace: + """ + A class for face detection using the RetinaFace model. + + Args: + model (str): Path or identifier of the model weights. + conf_thresh (float): Confidence threshold for detections. Defaults to 0.5. + nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4. + pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000. + post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750. + dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False. + input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640). + + Attributes: + conf_thresh (float): Confidence threshold for filtering detections. + nms_thresh (float): Threshold for NMS to remove duplicate detections. + pre_nms_topk (int): Maximum detections to consider before applying NMS. + post_nms_topk (int): Maximum detections retained after applying NMS. + dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted. + input_size (Tuple[int, int]): The model's input image size. + _model_path (str): Path to the model weights. + _priors (torch.Tensor): Precomputed anchor boxes for static input size. + """ + + def __init__( + self, + model: str, + conf_thresh: float = 0.5, + nms_thresh: float = 0.4, + pre_nms_topk: int = 5000, + post_nms_topk: int = 750, + dynamic_size: Optional[bool] = False, + input_size: Optional[Tuple[int, int]] = (640, 640), # Default input size if dynamic_size=False + ) -> None: + + self.conf_thresh = conf_thresh + self.nms_thresh = nms_thresh + self.pre_nms_topk = pre_nms_topk + self.post_nms_topk = post_nms_topk + self.dynamic_size = dynamic_size + self.input_size = input_size + + Logger.info( + f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, " + f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, " + f"input_size={input_size}" + ) + + # Get path to model weights + self._model_path = verify_model_weights(model) + Logger.info(f"Verified model weights located at: {self._model_path}") + + # Precompute anchors if using static size + if not dynamic_size and input_size is not None: + self._priors = generate_anchors(image_size=input_size) + Logger.debug("Generated anchors for static input size.") + + # Initialize model + self._initialize_model(self._model_path) + + def _initialize_model(self, model_path: str) -> None: + """ + Initializes an ONNX model session from the given path. + + Args: + model_path (str): The file path to the ONNX model. + + Raises: + RuntimeError: If the model fails to load, logs an error and raises an exception. + """ + try: + self.session = ort.InferenceSession(model_path) + self.input_name = self.session.get_inputs()[0].name + Logger.info(f"Successfully initialized the model from {model_path}") + except Exception as e: + Logger.error(f"Failed to load model from '{model_path}': {e}") + raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e + + def preprocess(self, image: np.ndarray) -> np.ndarray: + """Preprocess input image for model inference. + + Args: + image (np.ndarray): Input image. + + Returns: + np.ndarray: Preprocessed image tensor with shape (1, C, H, W) + """ + image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32) + image = image.transpose(2, 0, 1) # HWC to CHW + image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W) + return image + + def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]: + """Perform model inference on the preprocessed image tensor. + + Args: + input_tensor (np.ndarray): Preprocessed input tensor. + + Returns: + Tuple[np.ndarray, np.ndarray]: Raw model outputs. + """ + return self.session.run(None, {self.input_name: input_tensor}) + + def detect( + self, + image: np.ndarray, + max_num: Optional[int] = 0, + metric: Literal["default", "max"] = "default", + center_weight: Optional[float] = 2.0 + ) -> Tuple[np.ndarray, np.ndarray]: + """ + Perform face detection on an input image and return bounding boxes and landmarks. + + Args: + image (np.ndarray): Input image as a NumPy array of shape (height, width, channels). + max_num (int, optional): Maximum number of detections to return. Defaults to 1. + metric (str, optional): Metric for ranking detections when `max_num` is specified. + Options: + - "default": Prioritize detections closer to the image center. + - "max": Prioritize detections with larger bounding box areas. + center_weight (float, optional): Weight for penalizing detections farther from the image center + when using the "default" metric. Defaults to 2.0. + + Returns: + Tuple[np.ndarray, np.ndarray]: Detection results containing: + - detections (np.ndarray): Array of detected bounding boxes with confidence scores. + Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score]. + - landmarks (np.ndarray): Array of detected facial landmarks. + Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y). + """ + + if self.dynamic_size: + height, width, _ = image.shape + self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image + resize_factor = 1.0 # No resizing + else: + image, resize_factor = resize_image(image, target_shape=self.input_size) + + height, width, _ = image.shape + image_tensor = self.preprocess(image) + + # ONNXRuntime inference + outputs = self.inference(image_tensor) + + # Postprocessing + detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height)) + + if max_num > 0 and detections.shape[0] > max_num: + # Calculate area of detections + areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1]) + + # Calculate offsets from image center + center = (height // 2, width // 2) + offsets = np.vstack([ + (detections[:, 0] + detections[:, 2]) / 2 - center[1], + (detections[:, 1] + detections[:, 3]) / 2 - center[0] + ]) + offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0) + + # Calculate scores based on the chosen metric + if metric == 'max': + scores = areas + else: + scores = areas - offset_dist_squared * center_weight + + # Sort by scores and select top `max_num` + sorted_indices = np.argsort(scores)[::-1][:max_num] + + detections = detections[sorted_indices] + landmarks = landmarks[sorted_indices] + + return detections, landmarks + + def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]: + """ + Process the model outputs into final detection results. + + Args: + outputs (List[np.ndarray]): Raw outputs from the detection model. + - outputs[0]: Location predictions (bounding box coordinates). + - outputs[1]: Class confidence scores. + - outputs[2]: Landmark predictions. + resize_factor (float): Factor used to resize the input image during preprocessing. + shape (Tuple[int, int]): Original shape of the image as (height, width). + + Returns: + Tuple[np.ndarray, np.ndarray]: Processed results containing: + - detections (np.ndarray): Array of detected bounding boxes with confidence scores. + Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score]. + - landmarks (np.ndarray): Array of detected facial landmarks. + Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y). + """ + loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0) + + # Decode boxes and landmarks + boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy() + landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy() + + boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1])) + + # Extract confidence scores for the face class + scores = conf[:, 1] + mask = scores > self.conf_thresh + + # Filter by confidence threshold + boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask] + + # Sort by scores + order = scores.argsort()[::-1][:self.pre_nms_topk] + boxes, landmarks, scores = boxes[order], landmarks[order], scores[order] + + # Apply NMS + detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) + keep = nms(detections, self.nms_thresh) + detections, landmarks = detections[keep], landmarks[keep] + + # Keep top-k detections + detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk] + + landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32) + + return detections, landmarks + + def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]: + """Scale bounding boxes and landmarks to the original image size.""" + bbox_scale = np.array([shape[0], shape[1]] * 2) + boxes = boxes * bbox_scale / resize_factor + + landmark_scale = np.array([shape[0], shape[1]] * 5) + landmarks = landmarks * landmark_scale / resize_factor + + return boxes, landmarks diff --git a/uniface/version.py b/uniface/version.py new file mode 100644 index 0000000..ef80910 --- /dev/null +++ b/uniface/version.py @@ -0,0 +1,15 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# +# Licensed under the MIT License. +# You may obtain a copy of the License at +# +# https://opensource.org/licenses/MIT +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.1.2" +__author__ = "Yakhyokhuja Valikhujaev" diff --git a/uniface/visualization.py b/uniface/visualization.py new file mode 100644 index 0000000..5d73fda --- /dev/null +++ b/uniface/visualization.py @@ -0,0 +1,38 @@ +# Copyright 2024 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +import cv2 +import numpy as np + + +def draw_detections(image, detections, vis_threshold=0.6): + """ + Draw bounding boxes and landmarks on the image. + + Args: + image (ndarray): Image to draw detections on. + detections (tuple): (bounding boxes, landmarks) as NumPy arrays. + vis_threshold (float): Confidence threshold for filtering detections. + """ + + _colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)] + + # Unpack detections + boxes, landmarks = detections + scores = boxes[:, 4] + + # Filter detections by confidence threshold + filtered = scores >= vis_threshold + boxes = boxes[filtered, :4].astype(np.int32) + landmarks = landmarks[filtered] + scores = scores[filtered] + + print(f"#faces: {len(scores)}") + + # Draw bounding boxes, scores, and landmarks + for box, score, landmark in zip(boxes, scores, landmarks): + cv2.rectangle(image, box[:2], box[2:], (0, 0, 255), 2) + cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + for point, color in zip(landmark, _colors): + cv2.circle(image, tuple(point), 2, color, -1)