Merge pull request #6 from yakhyo/np-adapt

Remove torch dependency and adapt numpy
2025-12-30 09:02:25 +00:00 · 2025-03-16 17:53:56 +09:00
parent 4256407044 64c9c2f452
commit 0ae5714f99
11 changed files with 115 additions and 146 deletions
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ from uniface import RetinaFace

 # Initialize the RetinaFace model
 uniface_inference = RetinaFace(
-    model="retinaface_mnet_v2",  # Model name
+    model_name="retinaface_mnet_v2",  # Model name
    conf_thresh=0.5,             # Confidence threshold
    pre_nms_topk=5000,           # Pre-NMS Top-K detections
    nms_thresh=0.4,              # NMS IoU threshold
@@ -94,6 +94,7 @@ original_image = cv2.imread(image_path)

 # Perform inference
 boxes, landmarks = uniface_inference.detect(original_image)
+# boxes: [x_min, y_min, x_max, y_max, confidence]

 # Visualize results
 draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
@@ -126,6 +127,8 @@ while True:

    # Perform inference
    boxes, landmarks = uniface_inference.detect(frame)
+    # 'boxes' contains bounding box coordinates and confidence scores:
+    # Format: [x_min, y_min, x_max, y_max, confidence]

    # Draw detections on the frame
    draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
@@ -169,7 +172,7 @@ cv2.destroyAllWindows()
 from typings import Tuple

 RetinaFace(
-    model: str,
+    model_name: str,
    conf_thresh: float = 0.5,
    pre_nms_topk: int = 5000,
    nms_thresh: float = 0.4,
@@ -181,7 +184,7 @@ RetinaFace(

 **Parameters**:

- `model` _(str)_: Name of the model to use. Supported models:
+- `model_name` _(str)_: Name of the model to use. Supported models:
  - `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
  - `retinaface_r18`, `retinaface_r34`
 - `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.
--- a/examples/face_alignment.ipynb
+++ b/examples/face_alignment.ipynb
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
@@ -63,20 +63,20 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "2025-01-09 05:10:37,310 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
-      "2025-01-09 05:10:37,349 - INFO - Verified model weights located at: /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n",
-      "2025-01-09 05:10:37,445 - INFO - Successfully initialized the model from /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n"
+      "2025-03-16 14:32:33,786 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
+      "2025-03-16 14:32:33,830 - INFO - Verified model weights located at: C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n",
+      "2025-03-16 14:32:33,926 - INFO - Successfully initialized the model from C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n"
     ]
    }
   ],
   "source": [
    "# Initialize the RetinaFace model\n",
    "uniface_inference = RetinaFace(\n",
-    "    model=\"retinaface_mnet_v2\",  # Model name\n",
-    "    conf_thresh=0.5,             # Confidence threshold\n",
-    "    pre_nms_topk=5000,           # Pre-NMS Top-K detections\n",
-    "    nms_thresh=0.4,              # NMS IoU threshold\n",
-    "    post_nms_topk=750            # Post-NMS Top-K detections,\n",
+    "    model_name=\"retinaface_mnet_v2\",    # Model name\n",
+    "    conf_thresh=0.5,                    # Confidence threshold\n",
+    "    pre_nms_topk=5000,                  # Pre-NMS Top-K detections\n",
+    "    nms_thresh=0.4,                     # NMS IoU threshold\n",
+    "    post_nms_topk=750                   # Post-NMS Top-K detections,\n",
    ")"
   ]
  },
@@ -262,7 +262,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "face",
+   "display_name": "torch",
   "language": "python",
   "name": "python3"
  },
@@ -276,7 +276,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.10"
+   "version": "3.12.9"
  }
 },
 "nbformat": 4,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,35 @@
+[project]
+name = "uniface"
+version = "0.1.7"
+description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
+readme = "README.md"
+license = { text = "MIT" }
+authors = [
+    { name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
+]
+dependencies = [
+    "numpy",
+    "opencv-python",
+    "onnx",
+    "onnxruntime-gpu",
+    "scikit-image",
+    "requests"
+]
+requires-python = ">=3.8"
+
+[project.optional-dependencies]
+dev = ["pytest"]
+
+[project.urls]
+Homepage = "https://github.com/yakhyo/uniface"
+Repository = "https://github.com/yakhyo/uniface"
+
+[build-system]
+requires = ["setuptools>=64", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["uniface"]
+
+[tool.setuptools.package-data]
+"uniface" = ["*.txt", "*.md"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,7 @@
-pytest
 numpy
 opencv-python
-opencv-python-headless
 onnx
-onnxruntime
+onnxruntime-gpu
+scikit-image
 requests
-torch
-scikit-image
+pytest
--- a/scripts/release.sh
+++ b/scripts/release.sh
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-# Exit on errors
-set -e
-
-cd "$(dirname "$0")"/..
-
-echo "Deleting existing release-related files..."
-rm -rf dist/ build/ *.egg-info
-
-pip install --upgrade pip
-pip install twine
-
-echo "Creating a package for the current release (PyPI compatible)..."
-python3 setup.py sdist bdist_wheel
-
-echo "Release package created successfully in the 'dist/' folder."
-
-
-echo "Uploading the package to PyPI..."
-twine upload dist/*
-
-echo "Release uploaded successfully!"
--- a/setup.py
+++ b/setup.py
@@ -1,46 +0,0 @@
-import os
-from setuptools import setup, find_packages
-
-# Read the README file for the long description
-long_description = ""
-if os.path.exists("README.md"):
-    with open("README.md", "r", encoding="utf-8") as f:
-        long_description = f.read()
-
-setup(
-    name="uniface",
-    version="0.1.6",
-    packages=find_packages(),
-    install_requires=[
-        "numpy",
-        "opencv-python",
-        "onnx",
-        "onnxruntime",
-        "requests",
-        "torch",
-        "scikit-image"
-    ],
-    extras_require={
-        "dev": ["pytest"],
-    },
-    description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author="Yakhyokhuja Valikhujaev",
-    author_email="yakhyo9696@gmail.com",
-    url="https://github.com/yakhyo/uniface",
-    license="MIT",
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "License :: OSI Approved :: MIT License",
-        "Operating System :: OS Independent",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-    ],
-    keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
-    python_requires=">=3.8",
-)
--- a/test.py
+++ b/test.py
@@ -39,7 +39,7 @@ if __name__ == '__main__':

    # Initialize and run the ONNX inference
    retinaface_inference = RetinaFace(
-        model="retinaface_mnet_v2",
+        model_name="retinaface_mnet_v2",
        conf_thresh=0.5,
        pre_nms_topk=5000,
        nms_thresh=0.4,
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -13,7 +13,7 @@

 __license__ = "MIT"
 __author__ = "Yakhyokhuja Valikhujaev"
-__version__ = "0.1.6"
+__version__ = "0.1.7"


 from uniface.retinaface import RetinaFace
--- a/uniface/common.py
+++ b/uniface/common.py
@@ -7,7 +7,6 @@ import math
 import itertools
 import numpy as np

-import torch
 from typing import Tuple, List


@@ -44,7 +43,7 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
    return image, resize_factor


-def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
+def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
    """
    Generate anchor boxes for a given image size.

@@ -52,7 +51,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
        image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).

    Returns:
-        torch.Tensor: Anchor box coordinates as a tensor.
+        np.ndarray: Anchor box coordinates as a NumPy array.
    """
    image_size = image_size

@@ -79,7 +78,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
                for cy, cx in itertools.product(dense_cy, dense_cx):
                    anchors += [cx, cy, s_kx, s_ky]

-    output = torch.Tensor(anchors).view(-1, 4)
+    output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
    return output


@@ -123,56 +122,57 @@ def nms(dets: List[np.ndarray], threshold: float):
    return keep


-def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
+def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
    """
    Decode locations from predictions using priors to undo
    the encoding done for offset regression at train time.

    Args:
-        loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
-        priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
+        loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
+        priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
        variances (list[float]): Variances of prior boxes

    Returns:
-        tensor: Decoded bounding box predictions
+        np.ndarray: Decoded bounding box predictions
    """
    # Compute centers of predicted boxes
    cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]

    # Compute widths and heights of predicted boxes
-    wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
+    wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])

    # Convert center, size to corner coordinates
-    boxes = torch.empty_like(loc)
+    boxes = np.zeros_like(loc)
    boxes[:, :2] = cxcy - wh / 2  # xmin, ymin
    boxes[:, 2:] = cxcy + wh / 2  # xmax, ymax

    return boxes


-def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
+def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
    """
-    Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
+    Decode landmark predictions using prior boxes.

    Args:
-        predictions (tensor): Landmark predictions for localization layers.
-            Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
-        priors (tensor): Prior boxes in center-offset form.
-            Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
-        variances (list[float]): Variances of the prior boxes to scale the decoded values.
+        predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
+        priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
+        variances (list): Scaling factors for landmark offsets.

    Returns:
-        landmarks (tensor): Decoded landmark predictions.
-            Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
+        np.ndarray: Decoded landmarks, shape: [num_priors, 10]
    """

-    # Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
-    predictions = predictions.view(predictions.size(0), 5, 2)
+    # Reshape predictions to [num_priors, 5, 2] to process landmark points
+    predictions = predictions.reshape(predictions.shape[0], 5, 2)

-    # Perform the same operation on all landmark pairs at once
-    landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
+    # Expand priors to match (num_priors, 5, 2)
+    priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
+    priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
+
+    # Compute absolute landmark positions
+    landmarks = priors_xy + predictions * variances[0] * priors_wh

    # Flatten back to [num_priors, 10]
-    landmarks = landmarks.view(landmarks.size(0), -1)
+    landmarks = landmarks.reshape(landmarks.shape[0], -1)

    return landmarks
--- a/uniface/retinaface.py
+++ b/uniface/retinaface.py
@@ -7,12 +7,10 @@ import cv2
 import numpy as np
 import onnxruntime as ort

-import torch
 from typing import Tuple, List, Optional, Literal

 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
-
 from uniface.common import (
    nms,
    resize_image,
@@ -27,28 +25,32 @@ class RetinaFace:
    A class for face detection using the RetinaFace model.

    Args:
-        model (str): Path or identifier of the model weights.
-        conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
-        nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
-        pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
-        post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
-        dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
-        input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
+        model_name (str): Name of the model.
+        conf_thresh (float, optional): Confidence threshold for detections. Defaults to 0.5.
+        nms_thresh (float, optional): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
+        pre_nms_topk (int, optional): Maximum number of detections considered before applying NMS. Defaults to 5000.
+        post_nms_topk (int, optional): Maximum number of detections retained after NMS. Defaults to 750.
+        dynamic_size (bool, optional): Whether to dynamically adjust anchor generation based on image size. Defaults to False.
+        input_size (Tuple[int, int], optional): Static input size for the model (width, height). Used when `dynamic_size=False`. Defaults to (640, 640).

    Attributes:
        conf_thresh (float): Confidence threshold for filtering detections.
-        nms_thresh (float): Threshold for NMS to remove duplicate detections.
-        pre_nms_topk (int): Maximum detections to consider before applying NMS.
-        post_nms_topk (int): Maximum detections retained after applying NMS.
-        dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
-        input_size (Tuple[int, int]): The model's input image size.
-        _model_path (str): Path to the model weights.
-        _priors (torch.Tensor): Precomputed anchor boxes for static input size.
+        nms_thresh (float): NMS threshold to remove duplicate detections.
+        pre_nms_topk (int): Number of detections considered before applying NMS.
+        post_nms_topk (int): Maximum number of detections retained after applying NMS.
+        dynamic_size (bool): Whether the model dynamically adjusts input size and anchors.
+        input_size (Tuple[int, int] or None): The model's fixed input size (if `dynamic_size=False`), otherwise None.
+        _model_path (str): Verified path to the model weights.
+        _priors (np.ndarray or None): Precomputed anchor boxes when using static input size. None if `dynamic_size=True`.
+
+    Raises:
+        ValueError: If the model weights cannot be found or verified.
+        RuntimeError: If there is an error initializing the model.
    """

    def __init__(
        self,
-        model: str,
+        model_name: str,
        conf_thresh: float = 0.5,
        nms_thresh: float = 0.4,
        pre_nms_topk: int = 5000,
@@ -65,13 +67,13 @@ class RetinaFace:
        self.input_size = input_size

        Logger.info(
-            f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
+            f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
            f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
            f"input_size={input_size}"
        )

        # Get path to model weights
-        self._model_path = verify_model_weights(model)
+        self._model_path = verify_model_weights(model_name)
        Logger.info(f"Verified model weights located at: {self._model_path}")

        # Precompute anchors if using static size
@@ -217,8 +219,8 @@ class RetinaFace:
        loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)

        # Decode boxes and landmarks
-        boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
-        landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
+        boxes = decode_boxes(loc, self._priors)
+        landmarks = decode_landmarks(landmarks, self._priors)

        boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))