Merge pull request #6 from yakhyo/np-adapt

Remove torch dependency and adapt numpy
This commit is contained in:
Yakhyokhuja Valikhujaev
2025-03-16 17:53:56 +09:00
committed by GitHub
11 changed files with 115 additions and 146 deletions

View File

@@ -70,7 +70,7 @@ from uniface import RetinaFace
# Initialize the RetinaFace model
uniface_inference = RetinaFace(
model="retinaface_mnet_v2", # Model name
model_name="retinaface_mnet_v2", # Model name
conf_thresh=0.5, # Confidence threshold
pre_nms_topk=5000, # Pre-NMS Top-K detections
nms_thresh=0.4, # NMS IoU threshold
@@ -94,6 +94,7 @@ original_image = cv2.imread(image_path)
# Perform inference
boxes, landmarks = uniface_inference.detect(original_image)
# boxes: [x_min, y_min, x_max, y_max, confidence]
# Visualize results
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
@@ -126,6 +127,8 @@ while True:
# Perform inference
boxes, landmarks = uniface_inference.detect(frame)
# 'boxes' contains bounding box coordinates and confidence scores:
# Format: [x_min, y_min, x_max, y_max, confidence]
# Draw detections on the frame
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
@@ -169,7 +172,7 @@ cv2.destroyAllWindows()
from typings import Tuple
RetinaFace(
model: str,
model_name: str,
conf_thresh: float = 0.5,
pre_nms_topk: int = 5000,
nms_thresh: float = 0.4,
@@ -181,7 +184,7 @@ RetinaFace(
**Parameters**:
- `model` _(str)_: Name of the model to use. Supported models:
- `model_name` _(str)_: Name of the model to use. Supported models:
- `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
- `retinaface_r18`, `retinaface_r34`
- `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.

File diff suppressed because one or more lines are too long

View File

@@ -63,20 +63,20 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2025-01-09 05:10:37,310 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
"2025-01-09 05:10:37,349 - INFO - Verified model weights located at: /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n",
"2025-01-09 05:10:37,445 - INFO - Successfully initialized the model from /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n"
"2025-03-16 14:32:33,786 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
"2025-03-16 14:32:33,830 - INFO - Verified model weights located at: C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n",
"2025-03-16 14:32:33,926 - INFO - Successfully initialized the model from C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n"
]
}
],
"source": [
"# Initialize the RetinaFace model\n",
"uniface_inference = RetinaFace(\n",
" model=\"retinaface_mnet_v2\", # Model name\n",
" conf_thresh=0.5, # Confidence threshold\n",
" pre_nms_topk=5000, # Pre-NMS Top-K detections\n",
" nms_thresh=0.4, # NMS IoU threshold\n",
" post_nms_topk=750 # Post-NMS Top-K detections,\n",
" model_name=\"retinaface_mnet_v2\", # Model name\n",
" conf_thresh=0.5, # Confidence threshold\n",
" pre_nms_topk=5000, # Pre-NMS Top-K detections\n",
" nms_thresh=0.4, # NMS IoU threshold\n",
" post_nms_topk=750 # Post-NMS Top-K detections,\n",
")"
]
},
@@ -262,7 +262,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "face",
"display_name": "torch",
"language": "python",
"name": "python3"
},
@@ -276,7 +276,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
"version": "3.12.9"
}
},
"nbformat": 4,

35
pyproject.toml Normal file
View File

@@ -0,0 +1,35 @@
[project]
name = "uniface"
version = "0.1.7"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
readme = "README.md"
license = { text = "MIT" }
authors = [
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
]
dependencies = [
"numpy",
"opencv-python",
"onnx",
"onnxruntime-gpu",
"scikit-image",
"requests"
]
requires-python = ">=3.8"
[project.optional-dependencies]
dev = ["pytest"]
[project.urls]
Homepage = "https://github.com/yakhyo/uniface"
Repository = "https://github.com/yakhyo/uniface"
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools]
packages = ["uniface"]
[tool.setuptools.package-data]
"uniface" = ["*.txt", "*.md"]

View File

@@ -1,9 +1,7 @@
pytest
numpy
opencv-python
opencv-python-headless
onnx
onnxruntime
onnxruntime-gpu
scikit-image
requests
torch
scikit-image
pytest

View File

@@ -1,23 +0,0 @@
#!/bin/bash
# Exit on errors
set -e
cd "$(dirname "$0")"/..
echo "Deleting existing release-related files..."
rm -rf dist/ build/ *.egg-info
pip install --upgrade pip
pip install twine
echo "Creating a package for the current release (PyPI compatible)..."
python3 setup.py sdist bdist_wheel
echo "Release package created successfully in the 'dist/' folder."
echo "Uploading the package to PyPI..."
twine upload dist/*
echo "Release uploaded successfully!"

View File

@@ -1,46 +0,0 @@
import os
from setuptools import setup, find_packages
# Read the README file for the long description
long_description = ""
if os.path.exists("README.md"):
with open("README.md", "r", encoding="utf-8") as f:
long_description = f.read()
setup(
name="uniface",
version="0.1.6",
packages=find_packages(),
install_requires=[
"numpy",
"opencv-python",
"onnx",
"onnxruntime",
"requests",
"torch",
"scikit-image"
],
extras_require={
"dev": ["pytest"],
},
description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
long_description=long_description,
long_description_content_type="text/markdown",
author="Yakhyokhuja Valikhujaev",
author_email="yakhyo9696@gmail.com",
url="https://github.com/yakhyo/uniface",
license="MIT",
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries :: Python Modules",
],
keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
python_requires=">=3.8",
)

View File

@@ -39,7 +39,7 @@ if __name__ == '__main__':
# Initialize and run the ONNX inference
retinaface_inference = RetinaFace(
model="retinaface_mnet_v2",
model_name="retinaface_mnet_v2",
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,

View File

@@ -13,7 +13,7 @@
__license__ = "MIT"
__author__ = "Yakhyokhuja Valikhujaev"
__version__ = "0.1.6"
__version__ = "0.1.7"
from uniface.retinaface import RetinaFace

View File

@@ -7,7 +7,6 @@ import math
import itertools
import numpy as np
import torch
from typing import Tuple, List
@@ -44,7 +43,7 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
return image, resize_factor
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
"""
Generate anchor boxes for a given image size.
@@ -52,7 +51,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
Returns:
torch.Tensor: Anchor box coordinates as a tensor.
np.ndarray: Anchor box coordinates as a NumPy array.
"""
image_size = image_size
@@ -79,7 +78,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
for cy, cx in itertools.product(dense_cy, dense_cx):
anchors += [cx, cy, s_kx, s_ky]
output = torch.Tensor(anchors).view(-1, 4)
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
return output
@@ -123,56 +122,57 @@ def nms(dets: List[np.ndarray], threshold: float):
return keep
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
"""
Decode locations from predictions using priors to undo
the encoding done for offset regression at train time.
Args:
loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
variances (list[float]): Variances of prior boxes
Returns:
tensor: Decoded bounding box predictions
np.ndarray: Decoded bounding box predictions
"""
# Compute centers of predicted boxes
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
# Compute widths and heights of predicted boxes
wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
# Convert center, size to corner coordinates
boxes = torch.empty_like(loc)
boxes = np.zeros_like(loc)
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
return boxes
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
"""
Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
Decode landmark predictions using prior boxes.
Args:
predictions (tensor): Landmark predictions for localization layers.
Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
priors (tensor): Prior boxes in center-offset form.
Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
variances (list[float]): Variances of the prior boxes to scale the decoded values.
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
variances (list): Scaling factors for landmark offsets.
Returns:
landmarks (tensor): Decoded landmark predictions.
Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
"""
# Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
predictions = predictions.view(predictions.size(0), 5, 2)
# Reshape predictions to [num_priors, 5, 2] to process landmark points
predictions = predictions.reshape(predictions.shape[0], 5, 2)
# Perform the same operation on all landmark pairs at once
landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
# Expand priors to match (num_priors, 5, 2)
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
# Compute absolute landmark positions
landmarks = priors_xy + predictions * variances[0] * priors_wh
# Flatten back to [num_priors, 10]
landmarks = landmarks.view(landmarks.size(0), -1)
landmarks = landmarks.reshape(landmarks.shape[0], -1)
return landmarks

View File

@@ -7,12 +7,10 @@ import cv2
import numpy as np
import onnxruntime as ort
import torch
from typing import Tuple, List, Optional, Literal
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.common import (
nms,
resize_image,
@@ -27,28 +25,32 @@ class RetinaFace:
A class for face detection using the RetinaFace model.
Args:
model (str): Path or identifier of the model weights.
conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
model_name (str): Name of the model.
conf_thresh (float, optional): Confidence threshold for detections. Defaults to 0.5.
nms_thresh (float, optional): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
pre_nms_topk (int, optional): Maximum number of detections considered before applying NMS. Defaults to 5000.
post_nms_topk (int, optional): Maximum number of detections retained after NMS. Defaults to 750.
dynamic_size (bool, optional): Whether to dynamically adjust anchor generation based on image size. Defaults to False.
input_size (Tuple[int, int], optional): Static input size for the model (width, height). Used when `dynamic_size=False`. Defaults to (640, 640).
Attributes:
conf_thresh (float): Confidence threshold for filtering detections.
nms_thresh (float): Threshold for NMS to remove duplicate detections.
pre_nms_topk (int): Maximum detections to consider before applying NMS.
post_nms_topk (int): Maximum detections retained after applying NMS.
dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
input_size (Tuple[int, int]): The model's input image size.
_model_path (str): Path to the model weights.
_priors (torch.Tensor): Precomputed anchor boxes for static input size.
nms_thresh (float): NMS threshold to remove duplicate detections.
pre_nms_topk (int): Number of detections considered before applying NMS.
post_nms_topk (int): Maximum number of detections retained after applying NMS.
dynamic_size (bool): Whether the model dynamically adjusts input size and anchors.
input_size (Tuple[int, int] or None): The model's fixed input size (if `dynamic_size=False`), otherwise None.
_model_path (str): Verified path to the model weights.
_priors (np.ndarray or None): Precomputed anchor boxes when using static input size. None if `dynamic_size=True`.
Raises:
ValueError: If the model weights cannot be found or verified.
RuntimeError: If there is an error initializing the model.
"""
def __init__(
self,
model: str,
model_name: str,
conf_thresh: float = 0.5,
nms_thresh: float = 0.4,
pre_nms_topk: int = 5000,
@@ -65,13 +67,13 @@ class RetinaFace:
self.input_size = input_size
Logger.info(
f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
f"input_size={input_size}"
)
# Get path to model weights
self._model_path = verify_model_weights(model)
self._model_path = verify_model_weights(model_name)
Logger.info(f"Verified model weights located at: {self._model_path}")
# Precompute anchors if using static size
@@ -217,8 +219,8 @@ class RetinaFace:
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
# Decode boxes and landmarks
boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
boxes = decode_boxes(loc, self._priors)
landmarks = decode_landmarks(landmarks, self._priors)
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))