mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
Merge pull request #6 from yakhyo/np-adapt
Remove torch dependency and adapt numpy
This commit is contained in:
@@ -70,7 +70,7 @@ from uniface import RetinaFace
|
||||
|
||||
# Initialize the RetinaFace model
|
||||
uniface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2", # Model name
|
||||
model_name="retinaface_mnet_v2", # Model name
|
||||
conf_thresh=0.5, # Confidence threshold
|
||||
pre_nms_topk=5000, # Pre-NMS Top-K detections
|
||||
nms_thresh=0.4, # NMS IoU threshold
|
||||
@@ -94,6 +94,7 @@ original_image = cv2.imread(image_path)
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(original_image)
|
||||
# boxes: [x_min, y_min, x_max, y_max, confidence]
|
||||
|
||||
# Visualize results
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
|
||||
@@ -126,6 +127,8 @@ while True:
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(frame)
|
||||
# 'boxes' contains bounding box coordinates and confidence scores:
|
||||
# Format: [x_min, y_min, x_max, y_max, confidence]
|
||||
|
||||
# Draw detections on the frame
|
||||
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
|
||||
@@ -169,7 +172,7 @@ cv2.destroyAllWindows()
|
||||
from typings import Tuple
|
||||
|
||||
RetinaFace(
|
||||
model: str,
|
||||
model_name: str,
|
||||
conf_thresh: float = 0.5,
|
||||
pre_nms_topk: int = 5000,
|
||||
nms_thresh: float = 0.4,
|
||||
@@ -181,7 +184,7 @@ RetinaFace(
|
||||
|
||||
**Parameters**:
|
||||
|
||||
- `model` _(str)_: Name of the model to use. Supported models:
|
||||
- `model_name` _(str)_: Name of the model to use. Supported models:
|
||||
- `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
|
||||
- `retinaface_r18`, `retinaface_r34`
|
||||
- `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -63,20 +63,20 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-01-09 05:10:37,310 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
|
||||
"2025-01-09 05:10:37,349 - INFO - Verified model weights located at: /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n",
|
||||
"2025-01-09 05:10:37,445 - INFO - Successfully initialized the model from /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n"
|
||||
"2025-03-16 14:32:33,786 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
|
||||
"2025-03-16 14:32:33,830 - INFO - Verified model weights located at: C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n",
|
||||
"2025-03-16 14:32:33,926 - INFO - Successfully initialized the model from C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Initialize the RetinaFace model\n",
|
||||
"uniface_inference = RetinaFace(\n",
|
||||
" model=\"retinaface_mnet_v2\", # Model name\n",
|
||||
" conf_thresh=0.5, # Confidence threshold\n",
|
||||
" pre_nms_topk=5000, # Pre-NMS Top-K detections\n",
|
||||
" nms_thresh=0.4, # NMS IoU threshold\n",
|
||||
" post_nms_topk=750 # Post-NMS Top-K detections,\n",
|
||||
" model_name=\"retinaface_mnet_v2\", # Model name\n",
|
||||
" conf_thresh=0.5, # Confidence threshold\n",
|
||||
" pre_nms_topk=5000, # Pre-NMS Top-K detections\n",
|
||||
" nms_thresh=0.4, # NMS IoU threshold\n",
|
||||
" post_nms_topk=750 # Post-NMS Top-K detections,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -262,7 +262,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "face",
|
||||
"display_name": "torch",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -276,7 +276,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.10"
|
||||
"version": "3.12.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
35
pyproject.toml
Normal file
35
pyproject.toml
Normal file
@@ -0,0 +1,35 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "0.1.7"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
]
|
||||
dependencies = [
|
||||
"numpy",
|
||||
"opencv-python",
|
||||
"onnx",
|
||||
"onnxruntime-gpu",
|
||||
"scikit-image",
|
||||
"requests"
|
||||
]
|
||||
requires-python = ">=3.8"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["uniface"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
@@ -1,9 +1,7 @@
|
||||
pytest
|
||||
numpy
|
||||
opencv-python
|
||||
opencv-python-headless
|
||||
onnx
|
||||
onnxruntime
|
||||
onnxruntime-gpu
|
||||
scikit-image
|
||||
requests
|
||||
torch
|
||||
scikit-image
|
||||
pytest
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on errors
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")"/..
|
||||
|
||||
echo "Deleting existing release-related files..."
|
||||
rm -rf dist/ build/ *.egg-info
|
||||
|
||||
pip install --upgrade pip
|
||||
pip install twine
|
||||
|
||||
echo "Creating a package for the current release (PyPI compatible)..."
|
||||
python3 setup.py sdist bdist_wheel
|
||||
|
||||
echo "Release package created successfully in the 'dist/' folder."
|
||||
|
||||
|
||||
echo "Uploading the package to PyPI..."
|
||||
twine upload dist/*
|
||||
|
||||
echo "Release uploaded successfully!"
|
||||
46
setup.py
46
setup.py
@@ -1,46 +0,0 @@
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
# Read the README file for the long description
|
||||
long_description = ""
|
||||
if os.path.exists("README.md"):
|
||||
with open("README.md", "r", encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name="uniface",
|
||||
version="0.1.6",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"numpy",
|
||||
"opencv-python",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"requests",
|
||||
"torch",
|
||||
"scikit-image"
|
||||
],
|
||||
extras_require={
|
||||
"dev": ["pytest"],
|
||||
},
|
||||
description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
author="Yakhyokhuja Valikhujaev",
|
||||
author_email="yakhyo9696@gmail.com",
|
||||
url="https://github.com/yakhyo/uniface",
|
||||
license="MIT",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
],
|
||||
keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
|
||||
python_requires=">=3.8",
|
||||
)
|
||||
2
test.py
2
test.py
@@ -39,7 +39,7 @@ if __name__ == '__main__':
|
||||
|
||||
# Initialize and run the ONNX inference
|
||||
retinaface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2",
|
||||
model_name="retinaface_mnet_v2",
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "0.1.6"
|
||||
__version__ = "0.1.7"
|
||||
|
||||
|
||||
from uniface.retinaface import RetinaFace
|
||||
|
||||
@@ -7,7 +7,6 @@ import math
|
||||
import itertools
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List
|
||||
|
||||
|
||||
@@ -44,7 +43,7 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
|
||||
@@ -52,7 +51,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Anchor box coordinates as a tensor.
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
@@ -79,7 +78,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = torch.Tensor(anchors).view(-1, 4)
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
@@ -123,56 +122,57 @@ def nms(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
|
||||
Returns:
|
||||
tensor: Decoded bounding box predictions
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
"""
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = torch.empty_like(loc)
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
|
||||
Decode landmark predictions using prior boxes.
|
||||
|
||||
Args:
|
||||
predictions (tensor): Landmark predictions for localization layers.
|
||||
Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
|
||||
priors (tensor): Prior boxes in center-offset form.
|
||||
Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
|
||||
variances (list[float]): Variances of the prior boxes to scale the decoded values.
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
|
||||
Returns:
|
||||
landmarks (tensor): Decoded landmark predictions.
|
||||
Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
|
||||
predictions = predictions.view(predictions.size(0), 5, 2)
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Perform the same operation on all landmark pairs at once
|
||||
landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.view(landmarks.size(0), -1)
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
@@ -7,12 +7,10 @@ import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List, Optional, Literal
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from uniface.common import (
|
||||
nms,
|
||||
resize_image,
|
||||
@@ -27,28 +25,32 @@ class RetinaFace:
|
||||
A class for face detection using the RetinaFace model.
|
||||
|
||||
Args:
|
||||
model (str): Path or identifier of the model weights.
|
||||
conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
|
||||
dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
|
||||
input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
|
||||
model_name (str): Name of the model.
|
||||
conf_thresh (float, optional): Confidence threshold for detections. Defaults to 0.5.
|
||||
nms_thresh (float, optional): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int, optional): Maximum number of detections considered before applying NMS. Defaults to 5000.
|
||||
post_nms_topk (int, optional): Maximum number of detections retained after NMS. Defaults to 750.
|
||||
dynamic_size (bool, optional): Whether to dynamically adjust anchor generation based on image size. Defaults to False.
|
||||
input_size (Tuple[int, int], optional): Static input size for the model (width, height). Used when `dynamic_size=False`. Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Confidence threshold for filtering detections.
|
||||
nms_thresh (float): Threshold for NMS to remove duplicate detections.
|
||||
pre_nms_topk (int): Maximum detections to consider before applying NMS.
|
||||
post_nms_topk (int): Maximum detections retained after applying NMS.
|
||||
dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
|
||||
input_size (Tuple[int, int]): The model's input image size.
|
||||
_model_path (str): Path to the model weights.
|
||||
_priors (torch.Tensor): Precomputed anchor boxes for static input size.
|
||||
nms_thresh (float): NMS threshold to remove duplicate detections.
|
||||
pre_nms_topk (int): Number of detections considered before applying NMS.
|
||||
post_nms_topk (int): Maximum number of detections retained after applying NMS.
|
||||
dynamic_size (bool): Whether the model dynamically adjusts input size and anchors.
|
||||
input_size (Tuple[int, int] or None): The model's fixed input size (if `dynamic_size=False`), otherwise None.
|
||||
_model_path (str): Verified path to the model weights.
|
||||
_priors (np.ndarray or None): Precomputed anchor boxes when using static input size. None if `dynamic_size=True`.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights cannot be found or verified.
|
||||
RuntimeError: If there is an error initializing the model.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
model_name: str,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
pre_nms_topk: int = 5000,
|
||||
@@ -65,13 +67,13 @@ class RetinaFace:
|
||||
self.input_size = input_size
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model)
|
||||
self._model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Precompute anchors if using static size
|
||||
@@ -217,8 +219,8 @@ class RetinaFace:
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
|
||||
landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user