feat: Bump to version v0.1.3, face alignment feature added

2025-12-30 09:02:25 +00:00 · 2024-11-21 02:34:01 +00:00
parent 6a69739e8e
commit a158e47f52
9 changed files with 314 additions and 27 deletions
--- a/README.md
+++ b/README.md
@@ -17,8 +17,10 @@
 ---

 ## Features
+
 - [ ] Age and gender detection (Planned).
 - [ ] Face recognition (Planned).
+- [x] Face Alignment (Added: 2024-11-21).
 - [x] High-speed face detection using ONNX models (Added: 2024-11-20).
 - [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20).
 - [x] Easy-to-use API for inference and visualization (Added: 2024-11-20).
@@ -27,24 +29,17 @@

 ## Installation

-### Using pip
+The easiest way to install **UniFace** is via [PyPI](https://pypi.org/project/uniface/). This will automatically install the library along with its prerequisites.

 ```bash
 pip install uniface
 ```

-### Local installation using pip
-
-**Clone the repository**
+To work with the latest version of **UniFace**, which may not yet be released on PyPI, you can install it directly from the repository:

 ```bash
 git clone https://github.com/yakhyo/uniface.git
 cd uniface
-```
-
-**Install using pip**
-
-```bash
 pip install .
 ```

@@ -147,6 +142,7 @@ cv2.destroyAllWindows()
 ### `RetinaFace` Class

 #### Initialization
+
 ```python
 RetinaFace(
    model: str,
@@ -158,17 +154,19 @@ RetinaFace(
 ```

 **Parameters**:
- `model` *(str)*: Name of the model to use. Supported models:
+
+- `model` _(str)_: Name of the model to use. Supported models:
  - `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
  - `retinaface_r18`, `retinaface_r34`
- `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections.
- `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS.
- `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression.
- `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS.
+- `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.
+- `pre_nms_topk` _(int, default=5000)_: Max detections to keep before NMS.
+- `nms_thresh` _(float, default=0.4)_: IoU threshold for Non-Maximum Suppression.
+- `post_nms_topk` _(int, default=750)_: Max detections to keep after NMS.

 ---

 ### `detect` Method
+
 ```python
 detect(
    image: np.ndarray,
@@ -182,22 +180,25 @@ detect(
 Detects faces in the given image and returns bounding boxes and landmarks.

 **Parameters**:
- `image` *(np.ndarray)*: Input image in BGR format.
- `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all.
- `metric` *(str, default="default")*: Metric for prioritizing detections:
+
+- `image` _(np.ndarray)_: Input image in BGR format.
+- `max_num` _(int, default=0)_: Maximum number of faces to return. `0` means return all.
+- `metric` _(str, default="default")_: Metric for prioritizing detections:
  - `"default"`: Prioritize detections closer to the image center.
  - `"max"`: Prioritize larger bounding box areas.
- `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces.
+- `center_weight` _(float, default=2.0)_: Weight for prioritizing center-aligned faces.

 **Returns**:
- `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
- `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
+
+- `bounding_boxes` _(np.ndarray)_: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
+- `landmarks` _(np.ndarray)_: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.

 ---

 ### Visualization Utilities

 #### `draw_detections`
+
 ```python
 draw_detections(
    image: np.ndarray,
@@ -210,9 +211,10 @@ draw_detections(
 Draws bounding boxes and landmarks on the given image.

 **Parameters**:
- `image` *(np.ndarray)*: The input image in BGR format.
- `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks.
- `vis_threshold` *(float)*: Minimum confidence score for visualization.
+
+- `image` _(np.ndarray)_: The input image in BGR format.
+- `detections` _(Tuple[np.ndarray, np.ndarray])_: A tuple of bounding boxes and landmarks.
+- `vis_threshold` _(float)_: Minimum confidence score for visualization.

 ---

--- a/examples/.gitkeep
+++ b/examples/.gitkeep
--- a/examples/face_alignment.ipynb
+++ b/examples/face_alignment.ipynb
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ onnx
 onnxruntime
 requests
 torch
+scikit-image
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@ if os.path.exists("README.md"):

 setup(
    name="uniface",
-    version="0.1.2",
+    version="0.1.3",
    packages=find_packages(),
    install_requires=[
        "numpy",
@@ -17,7 +17,8 @@ setup(
        "onnx",
        "onnxruntime",
        "requests",
-        "torch"
+        "torch",
+        "scikit-image"
    ],
    extras_require={
        "dev": ["pytest"],
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -16,6 +16,7 @@ from uniface.retinaface import RetinaFace
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.version import __version__, __author__
+from uniface.alignment import face_alignment
 from uniface.visualization import draw_detections

 __all__ = [
@@ -24,5 +25,6 @@ __all__ = [
    "RetinaFace",
    "Logger",
    "verify_model_weights",
-    "draw_detections"
+    "draw_detections",
+    "face_alignment"
 ]
--- a/uniface/alignment.py
+++ b/uniface/alignment.py
@@ -0,0 +1,84 @@
+# Copyright 2024 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+import cv2
+import numpy as np
+from skimage.transform import SimilarityTransform
+from typing import Tuple
+
+# Reference alignment for facial landmarks (ArcFace)
+reference_alignment: np.ndarray = np.array(
+    [[
+        [38.2946, 51.6963],
+        [73.5318, 51.5014],
+        [56.0252, 71.7366],
+        [41.5493, 92.3655],
+        [70.7299, 92.2041]
+    ]],
+    dtype=np.float32
+)
+
+
+def estimate_norm(landmark: np.ndarray, image_size: int = 112) -> Tuple[np.ndarray, int]:
+    """
+    Estimate the normalization transformation matrix for facial landmarks.
+
+    Args:
+        landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
+        image_size (int, optional): The size of the output image. Default is 112.
+
+    Returns:
+        Tuple[np.ndarray, int]: A tuple containing:
+            - min_matrix (np.ndarray): The 2x3 transformation matrix for aligning the landmarks.
+            - min_index (int): The index of the reference alignment that resulted in the minimum error.
+
+    Raises:
+        AssertionError: If the input landmark array does not have the shape (5, 2).
+    """
+    assert landmark.shape == (5, 2), "Landmark array must have shape (5, 2)."
+    min_matrix: np.ndarray = np.empty((2, 3))
+    min_index: int = -1
+    min_error: float = float('inf')
+
+    # Prepare landmarks for transformation
+    landmark_transform = np.insert(landmark, 2, values=np.ones(5), axis=1)
+    transform = SimilarityTransform()
+
+    # Adjust alignment based on image size
+    if image_size == 112:
+        alignment = reference_alignment
+    else:
+        alignment = (image_size / 112) * reference_alignment
+
+    # Iterate through reference alignments
+    for idx in np.arange(alignment.shape[0]):
+        transform.estimate(landmark, alignment[idx])
+        matrix = transform.params[0:2, :]
+        results = np.dot(matrix, landmark_transform.T).T
+        error = np.sum(np.sqrt(np.sum((results - alignment[idx]) ** 2, axis=1)))
+        if error < min_error:
+            min_error = error
+            min_matrix = matrix
+            min_index = idx
+
+    return min_matrix, min_index
+
+
+def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: int = 112) -> np.ndarray:
+    """
+    Align the face in the input image based on the given facial landmarks.
+
+    Args:
+        image (np.ndarray): Input image as a NumPy array.
+        landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
+        image_size (int, optional): The size of the aligned output image. Default is 112.
+
+    Returns:
+        np.ndarray: The aligned face as a NumPy array.
+    """
+    # Get the transformation matrix and pose index
+    M, pose_index = estimate_norm(landmark, image_size)
+    # Warp the input image to align the face
+    warped = cv2.warpAffine(image, M, (image_size, image_size), borderValue=0.0)
+    return warped
--- a/uniface/version.py
+++ b/uniface/version.py
@@ -11,5 +11,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-__version__ = "0.1.2"
+__version__ = "0.1.3"
 __author__ = "Yakhyokhuja Valikhujaev"