feat: Change setup.py to pyproject.toml and remove torch dependency

2025-12-30 09:02:25 +00:00 · 2025-03-16 14:36:35 +09:00
parent 4256407044
commit d586cffb3a
10 changed files with 111 additions and 122 deletions
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ from uniface import RetinaFace
 # Initialize the RetinaFace model
 uniface_inference = RetinaFace(
-    model="retinaface_mnet_v2",  # Model name
+    model_name="retinaface_mnet_v2",  # Model name
    conf_thresh=0.5,             # Confidence threshold
    pre_nms_topk=5000,           # Pre-NMS Top-K detections
    nms_thresh=0.4,              # NMS IoU threshold
@@ -169,7 +169,7 @@ cv2.destroyAllWindows()
 from typings import Tuple
 RetinaFace(
-    model: str,
+    model_name: str,
    conf_thresh: float = 0.5,
    pre_nms_topk: int = 5000,
    nms_thresh: float = 0.4,
--- a/examples/face_alignment.ipynb
+++ b/examples/face_alignment.ipynb
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
@@ -63,20 +63,20 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "2025-01-09 05:10:37,310 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
+      "2025-03-16 14:32:33,786 - INFO - Initializing RetinaFace with model=retinaface_mnet_v2, conf_thresh=0.5, nms_thresh=0.4, pre_nms_topk=5000, post_nms_topk=750, dynamic_size=False, input_size=(640, 640)\n",
-      "2025-01-09 05:10:37,349 - INFO - Verified model weights located at: /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n",
+      "2025-03-16 14:32:33,830 - INFO - Verified model weights located at: C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n",
-      "2025-01-09 05:10:37,445 - INFO - Successfully initialized the model from /home/yakhyo/.uniface/models/retinaface_mnet_v2.onnx\n"
+      "2025-03-16 14:32:33,926 - INFO - Successfully initialized the model from C:\\Users\\yakhyo/.uniface/models\\retinaface_mnet_v2.onnx\n"
     ]
    }
   ],
   "source": [
    "# Initialize the RetinaFace model\n",
    "uniface_inference = RetinaFace(\n",
-    "    model=\"retinaface_mnet_v2\",  # Model name\n",
+    "    model_name=\"retinaface_mnet_v2\",    # Model name\n",
-    "    conf_thresh=0.5,             # Confidence threshold\n",
+    "    conf_thresh=0.5,                    # Confidence threshold\n",
-    "    pre_nms_topk=5000,           # Pre-NMS Top-K detections\n",
+    "    pre_nms_topk=5000,                  # Pre-NMS Top-K detections\n",
-    "    nms_thresh=0.4,              # NMS IoU threshold\n",
+    "    nms_thresh=0.4,                     # NMS IoU threshold\n",
-    "    post_nms_topk=750            # Post-NMS Top-K detections,\n",
+    "    post_nms_topk=750                   # Post-NMS Top-K detections,\n",
    ")"
   ]
  },
@@ -262,7 +262,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "face",
+   "display_name": "torch",
   "language": "python",
   "name": "python3"
  },
@@ -276,7 +276,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.10"
+   "version": "3.12.9"
  }
 },
 "nbformat": 4,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,35 @@
 [project]
 name = "uniface"
 version = "0.1.7"
 description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
 readme = "README.md"
 license = { text = "MIT" }
 authors = [
    { name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
 ]
 dependencies = [
    "numpy",
    "opencv-python",
    "onnx",
    "onnxruntime-gpu",
    "scikit-image",
    "requests"
 ]
 requires-python = ">=3.8"
 [project.optional-dependencies]
 dev = ["pytest"]
 [project.urls]
 Homepage = "https://github.com/yakhyo/uniface"
 Repository = "https://github.com/yakhyo/uniface"
 [build-system]
 requires = ["setuptools>=64", "wheel"]
 build-backend = "setuptools.build_meta"
 [tool.setuptools]
 packages = ["uniface"]
 [tool.setuptools.package-data]
 "uniface" = ["*.txt", "*.md"]
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,7 @@
 pytest
 numpy
 opencv-python
 opencv-python-headless
 onnx
-onnxruntime
+onnxruntime-gpu
 scikit-image
 requests
-torch
+pytest
 scikit-image
--- a/setup.py
+++ b/setup.py
@@ -1,46 +0,0 @@
 import os
 from setuptools import setup, find_packages
 # Read the README file for the long description
 long_description = ""
 if os.path.exists("README.md"):
    with open("README.md", "r", encoding="utf-8") as f:
        long_description = f.read()
 setup(
    name="uniface",
    version="0.1.6",
    packages=find_packages(),
    install_requires=[
        "numpy",
        "opencv-python",
        "onnx",
        "onnxruntime",
        "requests",
        "torch",
        "scikit-image"
    ],
    extras_require={
        "dev": ["pytest"],
    },
    description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
    long_description=long_description,
    long_description_content_type="text/markdown",
    author="Yakhyokhuja Valikhujaev",
    author_email="yakhyo9696@gmail.com",
    url="https://github.com/yakhyo/uniface",
    license="MIT",
    classifiers=[
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: Python :: 3.12",
        "License :: OSI Approved :: MIT License",
        "Operating System :: OS Independent",
        "Topic :: Software Development :: Libraries :: Python Modules",
    ],
    keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
    python_requires=">=3.8",
 )
--- a/test.py
+++ b/test.py
@@ -39,7 +39,7 @@ if __name__ == '__main__':
    # Initialize and run the ONNX inference
    retinaface_inference = RetinaFace(
-        model="retinaface_mnet_v2",
+        model_name="retinaface_mnet_v2",
        conf_thresh=0.5,
        pre_nms_topk=5000,
        nms_thresh=0.4,
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -13,7 +13,7 @@
 __license__ = "MIT"
 __author__ = "Yakhyokhuja Valikhujaev"
-__version__ = "0.1.6"
+__version__ = "0.1.7"
 from uniface.retinaface import RetinaFace
--- a/uniface/common.py
+++ b/uniface/common.py
@@ -7,7 +7,6 @@ import math
 import itertools
 import numpy as np
 import torch
 from typing import Tuple, List
@@ -44,7 +43,7 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
    return image, resize_factor
-def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
+def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
    """
    Generate anchor boxes for a given image size.
@@ -52,7 +51,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
        image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
    Returns:
-        torch.Tensor: Anchor box coordinates as a tensor.
+        np.ndarray: Anchor box coordinates as a NumPy array.
    """
    image_size = image_size
@@ -79,7 +78,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
                for cy, cx in itertools.product(dense_cy, dense_cx):
                    anchors += [cx, cy, s_kx, s_ky]
-    output = torch.Tensor(anchors).view(-1, 4)
+    output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
    return output
@@ -123,56 +122,57 @@ def nms(dets: List[np.ndarray], threshold: float):
    return keep
-def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
+def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
    """
    Decode locations from predictions using priors to undo
    the encoding done for offset regression at train time.
    Args:
-        loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
+        loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
-        priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
+        priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
        variances (list[float]): Variances of prior boxes
    Returns:
-        tensor: Decoded bounding box predictions
+        np.ndarray: Decoded bounding box predictions
    """
    # Compute centers of predicted boxes
    cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
    # Compute widths and heights of predicted boxes
-    wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
+    wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
    # Convert center, size to corner coordinates
-    boxes = torch.empty_like(loc)
+    boxes = np.zeros_like(loc)
    boxes[:, :2] = cxcy - wh / 2  # xmin, ymin
    boxes[:, 2:] = cxcy + wh / 2  # xmax, ymax
    return boxes
-def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
+def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
    """
-    Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
+    Decode landmark predictions using prior boxes.
    Args:
-        predictions (tensor): Landmark predictions for localization layers.
+        predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
-            Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
+        priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
-        priors (tensor): Prior boxes in center-offset form.
+        variances (list): Scaling factors for landmark offsets.
            Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
        variances (list[float]): Variances of the prior boxes to scale the decoded values.
    Returns:
-        landmarks (tensor): Decoded landmark predictions.
+        np.ndarray: Decoded landmarks, shape: [num_priors, 10]
            Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
    """
-    # Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
+    # Reshape predictions to [num_priors, 5, 2] to process landmark points
-    predictions = predictions.view(predictions.size(0), 5, 2)
+    predictions = predictions.reshape(predictions.shape[0], 5, 2)
-    # Perform the same operation on all landmark pairs at once
+    # Expand priors to match (num_priors, 5, 2)
-    landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
+    priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
    priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1)  # (num_priors, 5, 2)
    # Compute absolute landmark positions
    landmarks = priors_xy + predictions * variances[0] * priors_wh
    # Flatten back to [num_priors, 10]
-    landmarks = landmarks.view(landmarks.size(0), -1)
+    landmarks = landmarks.reshape(landmarks.shape[0], -1)
    return landmarks
--- a/uniface/retinaface.py
+++ b/uniface/retinaface.py
@@ -7,12 +7,10 @@ import cv2
 import numpy as np
 import onnxruntime as ort
 import torch
 from typing import Tuple, List, Optional, Literal
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.common import (
    nms,
    resize_image,
@@ -27,28 +25,32 @@ class RetinaFace:
    A class for face detection using the RetinaFace model.
    Args:
-        model (str): Path or identifier of the model weights.
+        model_name (str): Name of the model.
-        conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
+        conf_thresh (float, optional): Confidence threshold for detections. Defaults to 0.5.
-        nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
+        nms_thresh (float, optional): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
-        pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
+        pre_nms_topk (int, optional): Maximum number of detections considered before applying NMS. Defaults to 5000.
-        post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
+        post_nms_topk (int, optional): Maximum number of detections retained after NMS. Defaults to 750.
-        dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
+        dynamic_size (bool, optional): Whether to dynamically adjust anchor generation based on image size. Defaults to False.
-        input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
+        input_size (Tuple[int, int], optional): Static input size for the model (width, height). Used when `dynamic_size=False`. Defaults to (640, 640).
    Attributes:
        conf_thresh (float): Confidence threshold for filtering detections.
-        nms_thresh (float): Threshold for NMS to remove duplicate detections.
+        nms_thresh (float): NMS threshold to remove duplicate detections.
-        pre_nms_topk (int): Maximum detections to consider before applying NMS.
+        pre_nms_topk (int): Number of detections considered before applying NMS.
-        post_nms_topk (int): Maximum detections retained after applying NMS.
+        post_nms_topk (int): Maximum number of detections retained after applying NMS.
-        dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
+        dynamic_size (bool): Whether the model dynamically adjusts input size and anchors.
-        input_size (Tuple[int, int]): The model's input image size.
+        input_size (Tuple[int, int] or None): The model's fixed input size (if `dynamic_size=False`), otherwise None.
-        _model_path (str): Path to the model weights.
+        _model_path (str): Verified path to the model weights.
-        _priors (torch.Tensor): Precomputed anchor boxes for static input size.
+        _priors (np.ndarray or None): Precomputed anchor boxes when using static input size. None if `dynamic_size=True`.
    Raises:
        ValueError: If the model weights cannot be found or verified.
        RuntimeError: If there is an error initializing the model.
    """
    def __init__(
        self,
-        model: str,
+        model_name: str,
        conf_thresh: float = 0.5,
        nms_thresh: float = 0.4,
        pre_nms_topk: int = 5000,
@@ -65,13 +67,13 @@ class RetinaFace:
        self.input_size = input_size
        Logger.info(
-            f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
+            f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
            f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
            f"input_size={input_size}"
        )
        # Get path to model weights
-        self._model_path = verify_model_weights(model)
+        self._model_path = verify_model_weights(model_name)
        Logger.info(f"Verified model weights located at: {self._model_path}")
        # Precompute anchors if using static size
@@ -217,8 +219,8 @@ class RetinaFace:
        loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
        # Decode boxes and landmarks
-        boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
+        boxes = decode_boxes(loc, self._priors)
-        landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
+        landmarks = decode_landmarks(landmarks, self._priors)
        boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))