Initial commit

This commit is contained in:
yakhyo
2024-11-20 08:43:25 +00:00
commit 6dbf4e6b87
18 changed files with 1345 additions and 0 deletions

64
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,64 @@
name: Build, Test, and Publish
on:
push:
branches:
- main
tags:
- "v*.*.*" # Trigger publish on version tags
pull_request:
branches:
- main
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[dev] || pip install pytest # Use extras_require if available
- name: Run Tests
run: |
pytest
publish:
runs-on: ubuntu-latest
needs: build # Publish only if tests pass
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10" # Use a single Python version for publishing
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build twine
- name: Build Package
run: python -m build
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
run: twine upload dist/*

162
.gitignore vendored Normal file
View File

@@ -0,0 +1,162 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Yakhyokhuja Valikhujaev
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

239
README.md Normal file
View File

@@ -0,0 +1,239 @@
# UniFace: All-in-One Face Analysis Library
<div align="center">
[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
![Python](https://img.shields.io/badge/Python-3.8%2B-blue)
[![PyPI Version](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
[![Build Status](https://github.com/yakhyo/uniface/actions/workflows/build.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![Code Style: PEP8](https://img.shields.io/badge/code%20style-PEP8-green.svg)](https://www.python.org/dev/peps/pep-0008/)
[![GitHub Release Downloads](https://img.shields.io/github/downloads/yakhyo/uniface/total.svg?label=Model%20Downloads)](https://github.com/yakhyo/uniface/releases)
</div>
**uniface** is a lightweight face detection library designed for high-performance face localization and landmark detection. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
---
## Features
- [ ] Age and gender detection (Planned).
- [ ] Face recognition (Planned).
- [x] High-speed face detection using ONNX models (Added: 2024-11-20).
- [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20).
- [x] Easy-to-use API for inference and visualization (Added: 2024-11-20).
---
## Installation
### Using pip
```bash
pip install uniface
```
### Local installation using pip
**Clone the repository**
```bash
git clone https://github.com/yakhyo/uniface.git
cd uniface
```
**Install using pip**
```bash
pip install .
```
---
## Quick Start
### Initialize the Model
```python
from uniface import RetinaFace
# Initialize the RetinaFace model
uniface_inference = RetinaFace(
model="retinaface_mnet_v2", # Model name
conf_thresh=0.5, # Confidence threshold
pre_nms_topk=5000, # Pre-NMS Top-K detections
nms_thresh=0.4, # NMS IoU threshold
post_nms_topk=750 # Post-NMS Top-K detections
)
```
### Run Inference
Inference on image:
```python
import cv2
from uniface.visualization import draw_detections
# Load an image
image_path = "assets/test.jpg"
original_image = cv2.imread(image_path)
# Perform inference
boxes, landmarks = uniface_inference.detect(original_image)
# Visualize results
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
# Save the output image
output_path = "output.jpg"
cv2.imwrite(output_path, original_image)
print(f"Saved output image to {output_path}")
```
Inference on video:
```python
import cv2
from uniface.visualization import draw_detections
# Initialize the webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Unable to access the webcam.")
exit()
while True:
# Capture a frame from the webcam
ret, frame = cap.read()
if not ret:
print("Error: Failed to read frame.")
break
# Perform inference
boxes, landmarks = uniface_inference.detect(frame)
# Draw detections on the frame
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
# Display the output
cv2.imshow("Webcam Inference", frame)
# Exit if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the webcam and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()
```
---
### Evaluation results of available models on WiderFace
| RetinaFace Models | Easy | Medium | Hard |
| ------------------ | ---------- | ---------- | ---------- |
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
| retinaface_r18 | 92.50% | 91.02% | 86.63% |
| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
## API Reference
### `RetinaFace` Class
#### Initialization
```python
RetinaFace(
model: str,
conf_thresh: float = 0.5,
pre_nms_topk: int = 5000,
nms_thresh: float = 0.4,
post_nms_topk: int = 750
)
```
**Parameters**:
- `model` *(str)*: Name of the model to use. Supported models:
- `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
- `retinaface_r18`, `retinaface_r34`
- `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections.
- `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS.
- `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression.
- `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS.
---
### `detect` Method
```python
detect(
image: np.ndarray,
max_num: int = 0,
metric: str = "default",
center_weight: float = 2.0
) -> Tuple[np.ndarray, np.ndarray]
```
**Description**:
Detects faces in the given image and returns bounding boxes and landmarks.
**Parameters**:
- `image` *(np.ndarray)*: Input image in BGR format.
- `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all.
- `metric` *(str, default="default")*: Metric for prioritizing detections:
- `"default"`: Prioritize detections closer to the image center.
- `"max"`: Prioritize larger bounding box areas.
- `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces.
**Returns**:
- `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
- `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
---
### Visualization Utilities
#### `draw_detections`
```python
draw_detections(
image: np.ndarray,
detections: Tuple[np.ndarray, np.ndarray],
vis_threshold: float
) -> None
```
**Description**:
Draws bounding boxes and landmarks on the given image.
**Parameters**:
- `image` *(np.ndarray)*: The input image in BGR format.
- `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks.
- `vis_threshold` *(float)*: Minimum confidence score for visualization.
---
## Contributing
We welcome contributions to enhance the library! Feel free to:
- Submit bug reports or feature requests.
- Fork the repository and create a pull request.
---
## License
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
---
## Acknowledgments
- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
- Inspired by InsightFace and other face detection projects.
---

BIN
assets/test.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

8
requirements.txt Normal file
View File

@@ -0,0 +1,8 @@
pytest
numpy
opencv-python
opencv-python-headless
onnx
onnxruntime
requests
torch

23
scripts/release.sh Normal file
View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Exit on errors
set -e
cd "$(dirname "$0")"/..
echo "Deleting existing release-related files..."
rm -rf dist/ build/ *.egg-info
pip install --upgrade pip
pip install twine
echo "Creating a package for the current release (PyPI compatible)..."
python3 setup.py sdist bdist_wheel
echo "Release package created successfully in the 'dist/' folder."
echo "Uploading the package to PyPI..."
twine upload dist/*
echo "Release uploaded successfully!"

43
setup.py Normal file
View File

@@ -0,0 +1,43 @@
import os
from setuptools import setup, find_packages
# Read the README file for the long description
long_description = ""
if os.path.exists("README.md"):
with open("README.md", "r", encoding="utf-8") as f:
long_description = f.read()
setup(
name="uniface",
version="0.1.0",
packages=find_packages(),
install_requires=[
"numpy",
"opencv-python",
"onnx",
"onnxruntime",
"requests",
"torch"
],
extras_require={
"dev": ["pytest"],
},
description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
long_description=long_description,
long_description_content_type="text/markdown",
author="Yakhyokhuja Valikhujaev",
author_email="yakhyo9696@gmail.com",
url="https://github.com/yakhyo/uniface",
license="MIT",
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries :: Python Modules",
],
keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
python_requires=">=3.8",
)

57
test.py Normal file
View File

@@ -0,0 +1,57 @@
import os
import cv2
import numpy as np
from uniface import RetinaFace, draw_detections
def run_inference(image_path, save_image=False, vis_threshold=0.6):
"""
Perform inference on an image, draw detections, and optionally save the output image.
Args:
image_path (str): Path to the input image.
save_image (bool): Whether to save the output image with detections.
vis_threshold (float): Confidence threshold for displaying detections.
"""
# Load the image
original_image = cv2.imread(image_path)
if original_image is None:
print(f"Error: Could not read image from {image_path}")
return
# Perform face detection
boxes, landmarks = retinaface_inference.detect(original_image)
# Draw detections on the image
draw_detections(original_image, (boxes, landmarks), vis_threshold)
# Save the output image if requested
if save_image:
im_name = os.path.splitext(os.path.basename(image_path))[0]
save_name = f"{im_name}_out.jpg"
cv2.imwrite(save_name, original_image)
print(f"Image saved at '{save_name}'")
if __name__ == '__main__':
import time
# Initialize and run the ONNX inference
retinaface_inference = RetinaFace(
model="retinaface_mnet_v2",
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
post_nms_topk=750,
)
img_path = "assets/test.jpg"
avg = 0
for _ in range(50):
st = time.time()
run_inference(img_path, save_image=True, vis_threshold=0.6)
d = time.time() - st
print(d)
avg += d
print("avg", avg / 50)

78
tests/test_retinaface.py Normal file
View File

@@ -0,0 +1,78 @@
import pytest
import numpy as np
from uniface import RetinaFace
@pytest.fixture
def retinaface_model():
"""
Fixture to initialize the RetinaFace model for testing.
"""
return RetinaFace(
model="retinaface_mnet_v2",
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
post_nms_topk=750,
)
def test_model_initialization(retinaface_model):
"""
Test that the RetinaFace model initializes correctly.
"""
assert retinaface_model is not None, "Model initialization failed."
def test_inference_on_640x640_image(retinaface_model):
"""
Test inference on a 640x640 BGR image.
"""
# Generate a mock 640x640 BGR image
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
# Run inference
detections, landmarks = retinaface_model.detect(mock_image)
# Check output types
assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
# Check that detections have the expected shape
if detections.size > 0: # If faces are detected
assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
# Check landmarks shape
if landmarks.size > 0:
assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
def test_confidence_threshold(retinaface_model):
"""
Test that detections respect the confidence threshold.
"""
# Generate a mock 640x640 BGR image
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
# Run inference
detections, _ = retinaface_model.detect(mock_image)
# Ensure all detections have confidence scores above the threshold
if detections.size > 0: # If faces are detected
confidence_scores = detections[:, 4]
assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
def test_no_faces_detected(retinaface_model):
"""
Test inference on an image without detectable faces.
"""
# Generate an empty (black) 640x640 image
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
# Run inference
detections, landmarks = retinaface_model.detect(empty_image)
# Ensure no detections or landmarks are found
assert detections.size == 0, "Detections should be empty for a blank image."
assert landmarks.size == 0, "Landmarks should be empty for a blank image."

28
uniface/__init__.py Normal file
View File

@@ -0,0 +1,28 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
#
# Licensed under the MIT License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/MIT
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from uniface.retinaface import RetinaFace
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.version import __version__, __author__
from uniface.visualization import draw_detections
__all__ = [
"__version__",
"__author__"
"RetinaFace",
"Logger",
"verify_model_weights",
"draw_detections"
]

178
uniface/common.py Normal file
View File

@@ -0,0 +1,178 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import math
import itertools
import numpy as np
import torch
from typing import Tuple, List
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
"""
Resize an image to fit within a target shape while keeping its aspect ratio.
Args:
frame (np.ndarray): Input image.
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
Returns:
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
"""
width, height = target_shape
# Aspect-ratio preserving resize
im_ratio = float(frame.shape[0]) / frame.shape[1]
model_ratio = height / width
if im_ratio > model_ratio:
new_height = height
new_width = int(new_height / im_ratio)
else:
new_width = width
new_height = int(new_width * im_ratio)
resize_factor = float(new_height) / frame.shape[0]
resized_frame = cv2.resize(frame, (new_width, new_height))
# Create blank image and place resized image on it
image = np.zeros((height, width, 3), dtype=np.uint8)
image[:new_height, :new_width, :] = resized_frame
return image, resize_factor
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
"""
Generate anchor boxes for a given image size.
Args:
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
Returns:
torch.Tensor: Anchor box coordinates as a tensor.
"""
image_size = image_size
steps = [8, 16, 32]
min_sizes = [[16, 32], [64, 128], [256, 512]]
anchors = []
feature_maps = [
[
math.ceil(image_size[0] / step),
math.ceil(image_size[1] / step)
] for step in steps
]
for k, (map_height, map_width) in enumerate(feature_maps):
step = steps[k]
for i, j in itertools.product(range(map_height), range(map_width)):
for min_size in min_sizes[k]:
s_kx = min_size / image_size[1]
s_ky = min_size / image_size[0]
dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
for cy, cx in itertools.product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
output = torch.Tensor(anchors).view(-1, 4)
return output
def nms(dets: List[np.ndarray], threshold: float):
"""
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
Args:
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
threshold (float): IoU threshold for suppression.
Returns:
list: Indices of bounding boxes retained after suppression.
"""
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= threshold)[0]
order = order[inds + 1]
return keep
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
"""
Decode locations from predictions using priors to undo
the encoding done for offset regression at train time.
Args:
loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
variances (list[float]): Variances of prior boxes
Returns:
tensor: Decoded bounding box predictions
"""
# Compute centers of predicted boxes
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
# Compute widths and heights of predicted boxes
wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
# Convert center, size to corner coordinates
boxes = torch.empty_like(loc)
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
return boxes
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
"""
Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
Args:
predictions (tensor): Landmark predictions for localization layers.
Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
variances (list[float]): Variances of the prior boxes to scale the decoded values.
Returns:
landmarks (tensor): Decoded landmark predictions.
Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
"""
# Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
predictions = predictions.view(predictions.size(0), 5, 2)
# Perform the same operation on all landmark pairs at once
landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
# Flatten back to [num_priors, 10]
landmarks = landmarks.view(landmarks.size(0), -1)
return landmarks

26
uniface/constants.py Normal file
View File

@@ -0,0 +1,26 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Dict
MODEL_URLS: Dict[str, str] = {
'retinaface_mnet025': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.25.onnx',
'retinaface_mnet050': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1_0.50.onnx',
'retinaface_mnet_v1': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv1.onnx',
'retinaface_mnet_v2': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_mv2.onnx',
'retinaface_r18': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r18.onnx',
'retinaface_r34': 'https://github.com/yakhyo/uniface/releases/download/v0.0.1/retinaface_r34.onnx'
}
MODEL_SHA256: Dict[str, str] = {
'retinaface_mnet025': 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
'retinaface_mnet050': 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
'retinaface_mnet_v1': '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
'retinaface_mnet_v2': '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
'retinaface_r18': 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
'retinaface_r34': 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630'
}
CHUNK_SIZE = 8192

7
uniface/log.py Normal file
View File

@@ -0,0 +1,7 @@
import logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
Logger = logging.getLogger("retinaface")

102
uniface/model_store.py Normal file
View File

@@ -0,0 +1,102 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import os
import hashlib
import requests
from uniface.log import Logger
import uniface.constants as const
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
"""
Ensures model weights are available by downloading if missing and verifying integrity with a SHA-256 hash.
Checks if the specified model weights file exists in `root`. If missing, downloads from a predefined URL.
The file is then verified using its SHA-256 hash. If verification fails, the corrupted file is deleted,
and an error is raised.
Args:
model_name (str): Name of the model weights to verify or download.
root (str, optional): Directory to store the model weights. Defaults to '~/.uniface/models'.
Returns:
str: Path to the verified model weights file.
Raises:
ValueError: If the model is not found or if verification fails.
ConnectionError: If downloading the file fails.
Examples:
>>> # Download and verify 'retinaface_mnet025' weights
>>> verify_model_weights('retinaface_mnet025')
'/home/user/.uniface/models/retinaface_mnet025.onnx'
>>> # Use a custom directory
>>> verify_model_weights('retinaface_r34', root='/custom/dir')
'/custom/dir/retinaface_r34.onnx'
"""
root = os.path.expanduser(root)
os.makedirs(root, exist_ok=True)
model_path = os.path.join(root, f'{model_name}.onnx')
if not os.path.exists(model_path):
url = const.MODEL_URLS.get(model_name)
if not url:
Logger.error(f"No URL found for model '{model_name}'")
raise ValueError(f"No URL found for model '{model_name}'")
Logger.info(f"Downloading '{model_name}' from {url}")
download_file(url, model_path)
Logger.info(f"Successfully '{model_name}' downloaded to {model_path}")
expected_hash = const.MODEL_SHA256.get(model_name)
if expected_hash and not verify_file_hash(model_path, expected_hash):
os.remove(model_path) # Remove corrupted file
Logger.warning("Corrupted weight detected. Removing...")
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
return model_path
def download_file(url: str, dest_path: str) -> None:
"""Download a file from a URL in chunks and save it to the destination path."""
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(dest_path, "wb") as file:
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
if chunk:
file.write(chunk)
except requests.RequestException as e:
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
file_hash = hashlib.sha256()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
file_hash.update(chunk)
actual_hash = file_hash.hexdigest()
if actual_hash != expected_hash:
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
return actual_hash == expected_hash
if __name__ == "__main__":
model_names = [
'retinaface_mnet025',
'retinaface_mnet050',
'retinaface_mnet_v1',
'retinaface_mnet_v2',
'retinaface_r18',
'retinaface_r34'
]
# Download each model in the list
for model_name in model_names:
model_path = verify_model_weights(model_name)

256
uniface/retinaface.py Normal file
View File

@@ -0,0 +1,256 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import os
import cv2
import numpy as np
import onnxruntime as ort
import torch
from typing import Tuple, List, Optional, Literal
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.common import (
nms,
resize_image,
decode_boxes,
generate_anchors,
decode_landmarks
)
class RetinaFace:
"""
A class for face detection using the RetinaFace model.
Args:
model (str): Path or identifier of the model weights.
conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
Attributes:
conf_thresh (float): Confidence threshold for filtering detections.
nms_thresh (float): Threshold for NMS to remove duplicate detections.
pre_nms_topk (int): Maximum detections to consider before applying NMS.
post_nms_topk (int): Maximum detections retained after applying NMS.
dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
input_size (Tuple[int, int]): The model's input image size.
_model_path (str): Path to the model weights.
_priors (torch.Tensor): Precomputed anchor boxes for static input size.
"""
def __init__(
self,
model: str,
conf_thresh: float = 0.5,
nms_thresh: float = 0.4,
pre_nms_topk: int = 5000,
post_nms_topk: int = 750,
dynamic_size: Optional[bool] = False,
input_size: Optional[Tuple[int, int]] = (640, 640), # Default input size if dynamic_size=False
) -> None:
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
self.pre_nms_topk = pre_nms_topk
self.post_nms_topk = post_nms_topk
self.dynamic_size = dynamic_size
self.input_size = input_size
Logger.info(
f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
f"input_size={input_size}"
)
# Get path to model weights
self._model_path = verify_model_weights(model)
Logger.info(f"Verified model weights located at: {self._model_path}")
# Precompute anchors if using static size
if not dynamic_size and input_size is not None:
self._priors = generate_anchors(image_size=input_size)
Logger.debug("Generated anchors for static input size.")
# Initialize model
self._initialize_model(self._model_path)
def _initialize_model(self, model_path: str) -> None:
"""
Initializes an ONNX model session from the given path.
Args:
model_path (str): The file path to the ONNX model.
Raises:
RuntimeError: If the model fails to load, logs an error and raises an exception.
"""
try:
self.session = ort.InferenceSession(model_path)
self.input_name = self.session.get_inputs()[0].name
Logger.info(f"Successfully initialized the model from {model_path}")
except Exception as e:
Logger.error(f"Failed to load model from '{model_path}': {e}")
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
def preprocess(self, image: np.ndarray) -> np.ndarray:
"""Preprocess input image for model inference.
Args:
image (np.ndarray): Input image.
Returns:
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
"""
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
image = image.transpose(2, 0, 1) # HWC to CHW
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
return image
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
"""Perform model inference on the preprocessed image tensor.
Args:
input_tensor (np.ndarray): Preprocessed input tensor.
Returns:
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
"""
return self.session.run(None, {self.input_name: input_tensor})
def detect(
self,
image: np.ndarray,
max_num: Optional[int] = 0,
metric: Literal["default", "max"] = "default",
center_weight: Optional[float] = 2.0
) -> Tuple[np.ndarray, np.ndarray]:
"""
Perform face detection on an input image and return bounding boxes and landmarks.
Args:
image (np.ndarray): Input image as a NumPy array of shape (height, width, channels).
max_num (int, optional): Maximum number of detections to return. Defaults to 1.
metric (str, optional): Metric for ranking detections when `max_num` is specified.
Options:
- "default": Prioritize detections closer to the image center.
- "max": Prioritize detections with larger bounding box areas.
center_weight (float, optional): Weight for penalizing detections farther from the image center
when using the "default" metric. Defaults to 2.0.
Returns:
Tuple[np.ndarray, np.ndarray]: Detection results containing:
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
- landmarks (np.ndarray): Array of detected facial landmarks.
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
"""
if self.dynamic_size:
height, width, _ = image.shape
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
resize_factor = 1.0 # No resizing
else:
image, resize_factor = resize_image(image, target_shape=self.input_size)
height, width, _ = image.shape
image_tensor = self.preprocess(image)
# ONNXRuntime inference
outputs = self.inference(image_tensor)
# Postprocessing
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
if max_num > 0 and detections.shape[0] > max_num:
# Calculate area of detections
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
# Calculate offsets from image center
center = (height // 2, width // 2)
offsets = np.vstack([
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
])
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
# Calculate scores based on the chosen metric
if metric == 'max':
scores = areas
else:
scores = areas - offset_dist_squared * center_weight
# Sort by scores and select top `max_num`
sorted_indices = np.argsort(scores)[::-1][:max_num]
detections = detections[sorted_indices]
landmarks = landmarks[sorted_indices]
return detections, landmarks
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
"""
Process the model outputs into final detection results.
Args:
outputs (List[np.ndarray]): Raw outputs from the detection model.
- outputs[0]: Location predictions (bounding box coordinates).
- outputs[1]: Class confidence scores.
- outputs[2]: Landmark predictions.
resize_factor (float): Factor used to resize the input image during preprocessing.
shape (Tuple[int, int]): Original shape of the image as (height, width).
Returns:
Tuple[np.ndarray, np.ndarray]: Processed results containing:
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
- landmarks (np.ndarray): Array of detected facial landmarks.
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
"""
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
# Decode boxes and landmarks
boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
# Extract confidence scores for the face class
scores = conf[:, 1]
mask = scores > self.conf_thresh
# Filter by confidence threshold
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
# Sort by scores
order = scores.argsort()[::-1][:self.pre_nms_topk]
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
# Apply NMS
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = nms(detections, self.nms_thresh)
detections, landmarks = detections[keep], landmarks[keep]
# Keep top-k detections
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
return detections, landmarks
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
"""Scale bounding boxes and landmarks to the original image size."""
bbox_scale = np.array([shape[0], shape[1]] * 2)
boxes = boxes * bbox_scale / resize_factor
landmark_scale = np.array([shape[0], shape[1]] * 5)
landmarks = landmarks * landmark_scale / resize_factor
return boxes, landmarks

15
uniface/version.py Normal file
View File

@@ -0,0 +1,15 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
#
# Licensed under the MIT License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/MIT
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__version__ = "0.1.2"
__author__ = "Yakhyokhuja Valikhujaev"

38
uniface/visualization.py Normal file
View File

@@ -0,0 +1,38 @@
# Copyright 2024 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
def draw_detections(image, detections, vis_threshold=0.6):
"""
Draw bounding boxes and landmarks on the image.
Args:
image (ndarray): Image to draw detections on.
detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
vis_threshold (float): Confidence threshold for filtering detections.
"""
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
# Unpack detections
boxes, landmarks = detections
scores = boxes[:, 4]
# Filter detections by confidence threshold
filtered = scores >= vis_threshold
boxes = boxes[filtered, :4].astype(np.int32)
landmarks = landmarks[filtered]
scores = scores[filtered]
print(f"#faces: {len(scores)}")
# Draw bounding boxes, scores, and landmarks
for box, score, landmark in zip(boxes, scores, landmarks):
cv2.rectangle(image, box[:2], box[2:], (0, 0, 255), 2)
cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
for point, color in zip(landmark, _colors):
cv2.circle(image, tuple(point), 2, color, -1)