diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index 25b4980..0000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: Build, Test, and Publish
-
-on:
- push:
- branches:
- - main # Trigger on pushes to main
- tags:
- - "v*.*.*" # Trigger publish on version tags
- pull_request:
- branches:
- - main # Trigger checks on pull requests to main
-
-jobs:
- build:
- runs-on: ubuntu-latest
-
- strategy:
- matrix:
- python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v3
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: ${{ matrix.python-version }}
-
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install .[dev]
-
- - name: Run Tests
- run: pytest
-
- publish:
- runs-on: ubuntu-latest
- needs: build # Publish only if tests pass
- if: github.event_name == 'push' && github.ref == 'refs/heads/main' # Trigger only on pushes to main
-
- steps:
- - name: Checkout code
- uses: actions/checkout@v3
-
- - name: Set up Python
- uses: actions/setup-python@v4
- with:
- python-version: "3.10" # Use a single Python version for publishing
-
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- python -m pip install build twine
-
- - name: Build Package
- run: python -m build
-
- - name: Publish to PyPI
- env:
- TWINE_USERNAME: __token__
- TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
- run: twine upload dist/*
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..2e2c620
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,87 @@
+name: CI
+
+on:
+ push:
+ branches:
+ - main
+ - develop
+ pull_request:
+ branches:
+ - main
+ - develop
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install .[dev]
+
+ - name: Check ONNX Runtime providers
+ run: |
+ python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
+
+ - name: Lint with ruff (if available)
+ run: |
+ pip install ruff || true
+ ruff check . --exit-zero || true
+ continue-on-error: true
+
+ - name: Run tests
+ run: pytest -v --tb=short
+
+ - name: Test package imports
+ run: |
+ python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
+
+ build:
+ runs-on: ubuntu-latest
+ needs: test
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+ cache: 'pip'
+
+ - name: Install build tools
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install build
+
+ - name: Build package
+ run: python -m build
+
+ - name: Check package
+ run: |
+ python -m pip install twine
+ twine check dist/*
+
+ - name: Upload build artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: dist-python-${{ github.sha }}
+ path: dist/
+ retention-days: 7
+
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..b1a3de2
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,105 @@
+name: Publish to PyPI
+
+on:
+ push:
+ tags:
+ - "v*.*.*" # Trigger only on version tags like v0.1.9
+
+jobs:
+ validate:
+ runs-on: ubuntu-latest
+ outputs:
+ version: ${{ steps.get_version.outputs.version }}
+ tag_version: ${{ steps.get_version.outputs.tag_version }}
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Get version from tag and pyproject.toml
+ id: get_version
+ run: |
+ TAG_VERSION=${GITHUB_REF#refs/tags/v}
+ echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
+
+ PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
+ echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
+
+ echo "Tag version: v$TAG_VERSION"
+ echo "pyproject.toml version: $PYPROJECT_VERSION"
+
+ - name: Verify version match
+ run: |
+ if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
+ echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
+ exit 1
+ fi
+ echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
+
+ test:
+ runs-on: ubuntu-latest
+ needs: validate
+
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install .[dev]
+
+ - name: Run tests
+ run: pytest -v
+
+ publish:
+ runs-on: ubuntu-latest
+ needs: [validate, test]
+ environment:
+ name: pypi
+ url: https://pypi.org/project/uniface/
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+ cache: 'pip'
+
+ - name: Install build tools
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install build twine
+
+ - name: Build package
+ run: python -m build
+
+ - name: Check package
+ run: twine check dist/*
+
+ - name: Publish to PyPI
+ env:
+ TWINE_USERNAME: __token__
+ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+ run: twine upload dist/*
+
+ - name: Create GitHub Release
+ uses: softprops/action-gh-release@v1
+ with:
+ files: dist/*
+ generate_release_notes: true
+
diff --git a/MODELS.md b/MODELS.md
new file mode 100644
index 0000000..964991d
--- /dev/null
+++ b/MODELS.md
@@ -0,0 +1,395 @@
+# UniFace Model Zoo
+
+Complete guide to all available models, their performance characteristics, and selection criteria.
+
+---
+
+## Face Detection Models
+
+### RetinaFace Family
+
+RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
+
+| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
+|---------------------|--------|--------|--------|--------|--------|----------------------------|
+| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
+| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
+| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
+| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
+| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
+| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
+
+**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
+**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
+
+#### Usage
+
+```python
+from uniface import RetinaFace
+from uniface.constants import RetinaFaceWeights
+
+# Default (recommended)
+detector = RetinaFace() # Uses MNET_V2
+
+# Specific model
+detector = RetinaFace(
+ model_name=RetinaFaceWeights.MNET_025, # Fastest
+ conf_thresh=0.5,
+ nms_thresh=0.4,
+ input_size=(640, 640)
+)
+```
+
+---
+
+### SCRFD Family
+
+SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
+
+| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
+|-----------------|--------|-------|--------|--------|--------|----------------------------|
+| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
+| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
+
+**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
+**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
+
+#### Usage
+
+```python
+from uniface import SCRFD
+from uniface.constants import SCRFDWeights
+
+# Fast real-time detection
+detector = SCRFD(
+ model_name=SCRFDWeights.SCRFD_500M_KPS,
+ conf_thresh=0.5,
+ input_size=(640, 640)
+)
+
+# High accuracy
+detector = SCRFD(
+ model_name=SCRFDWeights.SCRFD_10G_KPS,
+ conf_thresh=0.5
+)
+```
+
+---
+
+## Face Recognition Models
+
+### ArcFace
+
+State-of-the-art face recognition using additive angular margin loss.
+
+| Model Name | Backbone | Params | Size | Use Case |
+|-------------|-------------|--------|-------|----------------------------|
+| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
+| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
+
+**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
+**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
+
+#### Usage
+
+```python
+from uniface import ArcFace
+from uniface.constants import ArcFaceWeights
+
+# Default (MobileNet backbone)
+recognizer = ArcFace()
+
+# High accuracy (ResNet50 backbone)
+recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
+
+# Extract embedding
+embedding = recognizer.get_normalized_embedding(image, landmarks)
+# Returns: (1, 512) normalized embedding vector
+```
+
+---
+
+### MobileFace
+
+Lightweight face recognition optimized for mobile devices.
+
+| Model Name | Backbone | Params | Size | Use Case |
+|-----------------|-----------------|--------|------|--------------------|
+| `MNET_025` | MobileNetV1 0.25| 0.2M | 1MB | Ultra-lightweight |
+| `MNET_V2` ⭐ | MobileNetV2 | 1.0M | 4MB | **Mobile/Edge** |
+| `MNET_V3_SMALL` | MobileNetV3-S | 0.8M | 3MB | Mobile optimized |
+| `MNET_V3_LARGE` | MobileNetV3-L | 2.5M | 10MB | Balanced mobile |
+
+**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
+
+#### Usage
+
+```python
+from uniface import MobileFace
+from uniface.constants import MobileFaceWeights
+
+# Lightweight
+recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
+```
+
+---
+
+### SphereFace
+
+Face recognition using angular softmax loss.
+
+| Model Name | Backbone | Params | Size | Use Case |
+|-------------|----------|--------|------|----------------------|
+| `SPHERE20` | Sphere20 | 13.0M | 50MB | Research/Comparison |
+| `SPHERE36` | Sphere36 | 24.2M | 92MB | Research/Comparison |
+
+**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace
+
+#### Usage
+
+```python
+from uniface import SphereFace
+from uniface.constants import SphereFaceWeights
+
+recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
+```
+
+---
+
+## Facial Landmark Models
+
+### 106-Point Landmark Detection
+
+High-precision facial landmark localization.
+
+| Model Name | Points | Params | Size | Use Case |
+|------------|--------|--------|------|-----------------------------|
+| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
+
+**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
+
+#### Usage
+
+```python
+from uniface import Landmark106
+
+landmarker = Landmark106()
+landmarks = landmarker.get_landmarks(image, bbox)
+# Returns: (106, 2) array of (x, y) coordinates
+```
+
+**Landmark Groups:**
+- Face contour: 0-32 (33 points)
+- Eyebrows: 33-50 (18 points)
+- Nose: 51-62 (12 points)
+- Eyes: 63-86 (24 points)
+- Mouth: 87-105 (19 points)
+
+---
+
+## Attribute Analysis Models
+
+### Age & Gender Detection
+
+| Model Name | Attributes | Params | Size | Use Case |
+|------------|-------------|--------|------|-------------------|
+| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
+
+**Dataset**: Trained on CelebA
+**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
+
+#### Usage
+
+```python
+from uniface import AgeGender
+
+predictor = AgeGender()
+gender, age = predictor.predict(image, bbox)
+# Returns: ("Male"/"Female", age_in_years)
+```
+
+---
+
+### Emotion Detection
+
+| Model Name | Classes | Params | Size | Use Case |
+|--------------|---------|--------|------|-----------------------|
+| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
+| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
+
+**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
+**Classes (8)**: Above + Contempt
+
+**Dataset**: Trained on AffectNet
+**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
+
+#### Usage
+
+```python
+from uniface import Emotion
+from uniface.constants import DDAMFNWeights
+
+predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
+emotion, confidence = predictor.predict(image, landmarks)
+```
+
+---
+
+## Model Selection Guide
+
+### By Use Case
+
+#### Mobile/Edge Devices
+- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
+- **Recognition**: `MobileFace(MNET_V2)`
+- **Priority**: Speed, small model size
+
+#### Real-Time Applications (Webcam, Video)
+- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
+- **Recognition**: `ArcFace(MNET)`
+- **Priority**: Speed-accuracy balance
+
+#### High-Accuracy Applications (Security, Verification)
+- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
+- **Recognition**: `ArcFace(RESNET)`
+- **Priority**: Maximum accuracy
+
+#### Server/Cloud Deployment
+- **Detection**: `SCRFD(SCRFD_10G)`
+- **Recognition**: `ArcFace(RESNET)`
+- **Priority**: Accuracy, batch processing
+
+---
+
+### By Hardware
+
+#### Apple Silicon (M1/M2/M3/M4)
+**Recommended**: All models work well with CoreML acceleration
+
+```bash
+pip install uniface[silicon]
+```
+
+**Recommended models**:
+- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
+- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
+- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
+
+**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
+
+#### NVIDIA GPU (CUDA)
+**Recommended**: Larger models for maximum throughput
+
+```bash
+pip install uniface[gpu]
+```
+
+**Recommended models**:
+- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
+- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
+- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
+
+#### CPU Only
+**Recommended**: Lightweight models
+
+**Recommended models**:
+- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
+- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
+- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
+
+**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
+
+---
+
+## Benchmark Details
+
+### How to Benchmark
+
+Run benchmarks on your own hardware:
+
+```bash
+# Detection speed
+python scripts/run_detection.py --image assets/test.jpg --iterations 100
+
+# Compare models
+python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
+python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
+```
+
+### Accuracy Metrics Explained
+
+- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
+ - **Easy**: Large faces (>50px), clear backgrounds
+ - **Medium**: Medium-sized faces (30-50px), moderate occlusion
+ - **Hard**: Small faces (<30px), heavy occlusion, blur
+
+ *Accuracy values are from the original papers - see references below*
+
+- **Model Size**: ONNX model file size (affects download time and memory)
+- **Params**: Number of model parameters (affects inference speed)
+
+### Important Notes
+
+1. **Speed varies by**:
+ - Image resolution
+ - Number of faces in image
+ - Hardware (CPU/GPU/CoreML)
+ - Batch size
+ - Operating system
+
+2. **Accuracy varies by**:
+ - Image quality
+ - Lighting conditions
+ - Face pose and occlusion
+ - Demographic factors
+
+3. **Always benchmark on your specific use case** before choosing a model
+
+---
+
+## Model Updates
+
+Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
+
+### Manual Model Management
+
+```python
+from uniface.model_store import verify_model_weights
+from uniface.constants import RetinaFaceWeights
+
+# Download specific model
+model_path = verify_model_weights(
+ RetinaFaceWeights.MNET_V2,
+ root='./custom_cache'
+)
+
+# Models are verified with SHA-256 checksums
+```
+
+### Download All Models
+
+```bash
+# Using the provided script
+python scripts/download_model.py
+
+# Download specific model
+python scripts/download_model.py --model MNET_V2
+```
+
+---
+
+## References
+
+### Model Training & Architectures
+
+- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
+- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
+- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
+
+### Papers
+
+- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
+- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
+- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
+- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
+
diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000..0d0aa0e
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,355 @@
+# UniFace Quick Start Guide
+
+Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
+
+---
+
+## Installation
+
+```bash
+# macOS (Apple Silicon)
+pip install uniface[silicon]
+
+# Linux/Windows with NVIDIA GPU
+pip install uniface[gpu]
+
+# CPU-only (all platforms)
+pip install uniface
+```
+
+---
+
+## 1. Face Detection (30 seconds)
+
+Detect faces in an image:
+
+```python
+import cv2
+from uniface import RetinaFace
+
+# Load image
+image = cv2.imread("photo.jpg")
+
+# Initialize detector (models auto-download on first use)
+detector = RetinaFace()
+
+# Detect faces
+faces = detector.detect(image)
+
+# Print results
+for i, face in enumerate(faces):
+ print(f"Face {i+1}:")
+ print(f" Confidence: {face['confidence']:.2f}")
+ print(f" BBox: {face['bbox']}")
+ print(f" Landmarks: {len(face['landmarks'])} points")
+```
+
+**Output:**
+```
+Face 1:
+ Confidence: 0.99
+ BBox: [120.5, 85.3, 245.8, 210.6]
+ Landmarks: 5 points
+```
+
+---
+
+## 2. Visualize Detections (1 minute)
+
+Draw bounding boxes and landmarks:
+
+```python
+import cv2
+from uniface import RetinaFace
+from uniface.visualization import draw_detections
+
+# Detect faces
+detector = RetinaFace()
+image = cv2.imread("photo.jpg")
+faces = detector.detect(image)
+
+# Extract visualization data
+bboxes = [f['bbox'] for f in faces]
+scores = [f['confidence'] for f in faces]
+landmarks = [f['landmarks'] for f in faces]
+
+# Draw on image
+draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
+
+# Save result
+cv2.imwrite("output.jpg", image)
+print("Saved output.jpg")
+```
+
+---
+
+## 3. Face Recognition (2 minutes)
+
+Compare two faces:
+
+```python
+import cv2
+import numpy as np
+from uniface import RetinaFace, ArcFace
+
+# Initialize models
+detector = RetinaFace()
+recognizer = ArcFace()
+
+# Load two images
+image1 = cv2.imread("person1.jpg")
+image2 = cv2.imread("person2.jpg")
+
+# Detect faces
+faces1 = detector.detect(image1)
+faces2 = detector.detect(image2)
+
+if faces1 and faces2:
+ # Extract embeddings
+ emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
+ emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+
+ # Compute similarity (cosine similarity)
+ similarity = np.dot(emb1, emb2.T)[0][0]
+
+ # Interpret result
+ if similarity > 0.6:
+ print(f"✅ Same person (similarity: {similarity:.3f})")
+ else:
+ print(f"❌ Different people (similarity: {similarity:.3f})")
+else:
+ print("No faces detected")
+```
+
+**Similarity thresholds:**
+- `> 0.6`: Same person (high confidence)
+- `0.4 - 0.6`: Uncertain (manual review)
+- `< 0.4`: Different people
+
+---
+
+## 4. Webcam Demo (2 minutes)
+
+Real-time face detection:
+
+```python
+import cv2
+from uniface import RetinaFace
+from uniface.visualization import draw_detections
+
+detector = RetinaFace()
+cap = cv2.VideoCapture(0)
+
+print("Press 'q' to quit")
+
+while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ # Detect faces
+ faces = detector.detect(frame)
+
+ # Draw results
+ bboxes = [f['bbox'] for f in faces]
+ scores = [f['confidence'] for f in faces]
+ landmarks = [f['landmarks'] for f in faces]
+ draw_detections(frame, bboxes, scores, landmarks)
+
+ # Show frame
+ cv2.imshow("UniFace - Press 'q' to quit", frame)
+
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+cap.release()
+cv2.destroyAllWindows()
+```
+
+---
+
+## 5. Age & Gender Detection (2 minutes)
+
+Detect age and gender:
+
+```python
+import cv2
+from uniface import RetinaFace, AgeGender
+
+# Initialize models
+detector = RetinaFace()
+age_gender = AgeGender()
+
+# Load image
+image = cv2.imread("photo.jpg")
+faces = detector.detect(image)
+
+# Predict attributes
+for i, face in enumerate(faces):
+ gender, age = age_gender.predict(image, face['bbox'])
+ print(f"Face {i+1}: {gender}, {age} years old")
+```
+
+**Output:**
+```
+Face 1: Male, 32 years old
+Face 2: Female, 28 years old
+```
+
+---
+
+## 6. Facial Landmarks (2 minutes)
+
+Detect 106 facial landmarks:
+
+```python
+import cv2
+from uniface import RetinaFace, Landmark106
+
+# Initialize models
+detector = RetinaFace()
+landmarker = Landmark106()
+
+# Detect face and landmarks
+image = cv2.imread("photo.jpg")
+faces = detector.detect(image)
+
+if faces:
+ landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+ print(f"Detected {len(landmarks)} landmarks")
+
+ # Draw landmarks
+ for x, y in landmarks.astype(int):
+ cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
+
+ cv2.imwrite("landmarks.jpg", image)
+```
+
+---
+
+## 7. Batch Processing (3 minutes)
+
+Process multiple images:
+
+```python
+import cv2
+from pathlib import Path
+from uniface import RetinaFace
+
+detector = RetinaFace()
+
+# Process all images in a folder
+image_dir = Path("images/")
+output_dir = Path("output/")
+output_dir.mkdir(exist_ok=True)
+
+for image_path in image_dir.glob("*.jpg"):
+ print(f"Processing {image_path.name}...")
+
+ image = cv2.imread(str(image_path))
+ faces = detector.detect(image)
+
+ print(f" Found {len(faces)} face(s)")
+
+ # Save results
+ output_path = output_dir / image_path.name
+ # ... draw and save ...
+
+print("Done!")
+```
+
+---
+
+## 8. Model Selection
+
+Choose the right model for your use case:
+
+```python
+from uniface import create_detector
+from uniface.constants import RetinaFaceWeights, SCRFDWeights
+
+# Fast detection (mobile/edge devices)
+detector = create_detector(
+ 'retinaface',
+ model_name=RetinaFaceWeights.MNET_025,
+ conf_thresh=0.7
+)
+
+# Balanced (recommended)
+detector = create_detector(
+ 'retinaface',
+ model_name=RetinaFaceWeights.MNET_V2
+)
+
+# High accuracy (server/GPU)
+detector = create_detector(
+ 'scrfd',
+ model_name=SCRFDWeights.SCRFD_10G_KPS,
+ conf_thresh=0.5
+)
+```
+
+---
+
+## Common Issues
+
+### 1. Models Not Downloading
+
+```python
+# Manually download a model
+from uniface.model_store import verify_model_weights
+from uniface.constants import RetinaFaceWeights
+
+model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
+print(f"Model downloaded to: {model_path}")
+```
+
+### 2. Check Hardware Acceleration
+
+```python
+import onnxruntime as ort
+print("Available providers:", ort.get_available_providers())
+
+# macOS M-series should show: ['CoreMLExecutionProvider', ...]
+# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
+```
+
+### 3. Slow Performance on Mac
+
+Make sure you installed with CoreML support:
+
+```bash
+pip install uniface[silicon]
+```
+
+### 4. Import Errors
+
+```python
+# ✅ Correct imports
+from uniface import RetinaFace, ArcFace, Landmark106
+from uniface.detection import create_detector
+
+# ❌ Wrong imports
+from uniface import retinaface # Module, not class
+```
+
+---
+
+## Next Steps
+
+- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
+- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
+- **Full Documentation**: Read [README.md](README.md) for complete API reference
+
+---
+
+## References
+
+- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
+- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
+- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
+
+---
+
+Happy coding! 🚀
+
diff --git a/README.md b/README.md
index 5ff90c3..60bdabf 100644
--- a/README.md
+++ b/README.md
@@ -1,44 +1,80 @@
# UniFace: All-in-One Face Analysis Library
[](https://opensource.org/licenses/MIT)
-
+
[](https://pypi.org/project/uniface/)
[](https://github.com/yakhyo/uniface/actions)
-[](https://github.com/yakhyo/uniface)
[](https://pepy.tech/project/uniface)
-[](https://www.python.org/dev/peps/pep-0008/)
-[](https://github.com/yakhyo/uniface/releases)
-**uniface** is a lightweight face detection library designed for high-performance face localization, landmark detection and face alignment. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
+**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
---
## Features
-| Date | Feature Description |
-| ---------- | --------------------------------------------------------------------------------------------------------------------- |
-| Planned | 🎭**Age and Gender Detection**: Planned feature for predicting age and gender from facial images. |
-| Planned | 🧩**Face Recognition**: Upcoming capability to identify and verify faces. |
-| 2024-11-21 | 🔄**Face Alignment**: Added precise face alignment for better downstream tasks. |
-| 2024-11-20 | ⚡**High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. |
-| 2024-11-20 | 🎯**Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. |
-| 2024-11-20 | 🛠**API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. |
+- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
+- **Facial Landmark Detection**: Accurate 106-point landmark localization
+- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
+- **Attribute Analysis**: Age, gender, and emotion detection
+- **Face Alignment**: Precise alignment for downstream tasks
+- **Hardware Acceleration**: CoreML (Apple Silicon), CUDA (NVIDIA), CPU fallback
+- **Simple API**: Intuitive factory functions and clean interfaces
+- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
---
## Installation
-The easiest way to install **UniFace** is via [PyPI](https://pypi.org/project/uniface/). This will automatically install the library along with its prerequisites.
+### Quick Install (All Platforms)
```bash
pip install uniface
```
-To work with the latest version of **UniFace**, which may not yet be released on PyPI, you can install it directly from the repository:
+### Platform-Specific Installation
+
+#### macOS (Apple Silicon - M1/M2/M3/M4)
+
+For optimal performance with **CoreML acceleration** (3-5x faster):
+
+```bash
+# Standard installation (CPU only)
+pip install uniface
+
+# With CoreML acceleration (recommended for M-series chips)
+pip install uniface[silicon]
+```
+
+**Verify CoreML is available:**
+```python
+import onnxruntime as ort
+print(ort.get_available_providers())
+# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
+```
+
+#### Linux/Windows with NVIDIA GPU
+
+```bash
+# With CUDA acceleration
+pip install uniface[gpu]
+```
+
+**Requirements:**
+- CUDA 11.x or 12.x
+- cuDNN 8.x
+- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
+
+#### CPU-Only (All Platforms)
+
+```bash
+pip install uniface
+```
+
+### Install from Source
```bash
git clone https://github.com/yakhyo/uniface.git
@@ -50,254 +86,362 @@ pip install -e .
## Quick Start
-To get started with face detection using **UniFace**, check out the [example notebook](examples/face_detection.ipynb).
-It demonstrates how to initialize the model, run inference, and visualize the results.
+### Face Detection
+
+```python
+import cv2
+from uniface import RetinaFace
+
+# Initialize detector
+detector = RetinaFace()
+
+# Load image
+image = cv2.imread("image.jpg")
+
+# Detect faces
+faces = detector.detect(image)
+
+# Process results
+for face in faces:
+ bbox = face['bbox'] # [x1, y1, x2, y2]
+ confidence = face['confidence']
+ landmarks = face['landmarks'] # 5-point landmarks
+ print(f"Face detected with confidence: {confidence:.2f}")
+```
+
+### Face Recognition
+
+```python
+from uniface import ArcFace, RetinaFace
+from uniface import compute_similarity
+
+# Initialize models
+detector = RetinaFace()
+recognizer = ArcFace()
+
+# Detect and extract embeddings
+faces1 = detector.detect(image1)
+faces2 = detector.detect(image2)
+
+embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
+embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+
+# Compare faces
+similarity = compute_similarity(embedding1, embedding2)
+print(f"Similarity: {similarity:.4f}")
+```
+
+### Facial Landmarks
+
+```python
+from uniface import RetinaFace, Landmark106
+
+detector = RetinaFace()
+landmarker = Landmark106()
+
+faces = detector.detect(image)
+landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+# Returns 106 (x, y) landmark points
+```
+
+### Age & Gender Detection
+
+```python
+from uniface import RetinaFace, AgeGender
+
+detector = RetinaFace()
+age_gender = AgeGender()
+
+faces = detector.detect(image)
+gender, age = age_gender.predict(image, faces[0]['bbox'])
+print(f"{gender}, {age} years old")
+```
---
-## Examples
+## Documentation
-
-

-
+- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
+- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
+- [**Examples**](examples/) - Jupyter notebooks with detailed examples
-Explore the following example notebooks to learn how to use **UniFace** effectively:
+---
-- [Face Detection](examples/face_detection.ipynb): Demonstrates how to perform face detection, draw bounding boxes, and landmarks on an image.
-- [Face Alignment](examples/face_alignment.ipynb): Shows how to align faces using detected landmarks.
-- [Age and Gender Detection](examples/age_gender.ipynb): Example for detecting age and gender from faces. (underdevelopment)
+## API Overview
-### 🚀 Initialize the RetinaFace Model
-
-To use the RetinaFace model for face detection, initialize it with either custom or default configuration parameters.
-
-#### Full Initialization (with custom parameters)
+### Factory Functions (Recommended)
```python
-from uniface import RetinaFace
+from uniface import create_detector, create_recognizer, create_landmarker
+
+# Create detector with default settings
+detector = create_detector('retinaface')
+
+# Create with custom config
+detector = create_detector(
+ 'scrfd',
+ model_name='scrfd_10g_kps',
+ conf_thresh=0.8,
+ input_size=(640, 640)
+)
+
+# Recognition and landmarks
+recognizer = create_recognizer('arcface')
+landmarker = create_landmarker('2d106det')
+```
+
+### Direct Model Instantiation
+
+```python
+from uniface import RetinaFace, SCRFD, ArcFace, MobileFace
from uniface.constants import RetinaFaceWeights
-# Initialize RetinaFace with custom configuration
-uniface_inference = RetinaFace(
- model_name=RetinaFaceWeights.MNET_V2, # Model name from enum
- conf_thresh=0.5, # Confidence threshold for detections
- pre_nms_topk=5000, # Number of top detections before NMS
- nms_thresh=0.4, # IoU threshold for NMS
- post_nms_topk=750, # Number of top detections after NMS
- dynamic_size=False, # Whether to allow arbitrary input sizes
- input_size=(640, 640) # Input image size (HxW)
+# Detection
+detector = RetinaFace(
+ model_name=RetinaFaceWeights.MNET_V2,
+ conf_thresh=0.5,
+ nms_thresh=0.4
)
+
+# Recognition
+recognizer = ArcFace() # Uses default weights
+recognizer = MobileFace() # Lightweight alternative
```
-#### Minimal Initialization (uses default parameters)
+### High-Level Detection API
```python
-from uniface import RetinaFace
+from uniface import detect_faces
-# Initialize with default settings
-uniface_inference = RetinaFace()
-```
-
-**Default Parameters:**
-
-```python
-model_name = RetinaFaceWeights.MNET_V2
-conf_thresh = 0.5
-pre_nms_topk = 5000
-nms_thresh = 0.4
-post_nms_topk = 750
-dynamic_size = False
-input_size = (640, 640)
-```
-
-### Run Inference
-
-Inference on image:
-
-```python
-import cv2
-from uniface.visualization import draw_detections
-
-# Load an image
-image_path = "assets/test.jpg"
-original_image = cv2.imread(image_path)
-
-# Perform inference
-boxes, landmarks = uniface_inference.detect(original_image)
-# boxes: [x_min, y_min, x_max, y_max, confidence]
-
-# Visualize results
-draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
-
-# Save the output image
-output_path = "output.jpg"
-cv2.imwrite(output_path, original_image)
-print(f"Saved output image to {output_path}")
-```
-
-Inference on video:
-
-```python
-import cv2
-from uniface.visualization import draw_detections
-
-# Initialize the webcam
-cap = cv2.VideoCapture(0)
-
-if not cap.isOpened():
- print("Error: Unable to access the webcam.")
- exit()
-
-while True:
- # Capture a frame from the webcam
- ret, frame = cap.read()
- if not ret:
- print("Error: Failed to read frame.")
- break
-
- # Perform inference
- boxes, landmarks = uniface_inference.detect(frame)
- # 'boxes' contains bounding box coordinates and confidence scores:
- # Format: [x_min, y_min, x_max, y_max, confidence]
-
- # Draw detections on the frame
- draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
-
- # Display the output
- cv2.imshow("Webcam Inference", frame)
-
- # Exit if 'q' is pressed
- if cv2.waitKey(1) & 0xFF == ord('q'):
- break
-
-# Release the webcam and close all OpenCV windows
-cap.release()
-cv2.destroyAllWindows()
+# One-line face detection
+faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
```
---
-### Evaluation results of available models on WiderFace
+## Model Performance
-| RetinaFace Models | Easy | Medium | Hard |
-| ------------------ | ---------------- | ---------------- | ---------------- |
-| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
-| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
-| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
-| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
-| retinaface_r18 | 92.50% | 91.02% | 86.63% |
-| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
+### Face Detection (WIDER FACE Dataset)
+
+| Model | Easy | Medium | Hard | Use Case |
+|--------------------|--------|--------|--------|-------------------------|
+| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
+| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
+| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
+| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
+| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
+
+*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
+
+**Benchmark on your hardware:**
+```bash
+python scripts/run_detection.py --image assets/test.jpg --iterations 100
+```
+
+See [MODELS.md](MODELS.md) for detailed model information and selection guide.
-## API Reference
+---
-### `RetinaFace` Class
+## Examples
-#### Initialization
+### Webcam Face Detection
```python
-from typings import Tuple
+import cv2
from uniface import RetinaFace
+from uniface.visualization import draw_detections
+
+detector = RetinaFace()
+cap = cv2.VideoCapture(0)
+
+while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ faces = detector.detect(frame)
+
+ # Extract data for visualization
+ bboxes = [f['bbox'] for f in faces]
+ scores = [f['confidence'] for f in faces]
+ landmarks = [f['landmarks'] for f in faces]
+
+ draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
+
+ cv2.imshow("Face Detection", frame)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+
+cap.release()
+cv2.destroyAllWindows()
+```
+
+### Face Search System
+
+```python
+import numpy as np
+from uniface import RetinaFace, ArcFace
+
+detector = RetinaFace()
+recognizer = ArcFace()
+
+# Build face database
+database = {}
+for person_id, image_path in person_images.items():
+ image = cv2.imread(image_path)
+ faces = detector.detect(image)
+ if faces:
+ embedding = recognizer.get_normalized_embedding(
+ image, faces[0]['landmarks']
+ )
+ database[person_id] = embedding
+
+# Search for a face
+query_image = cv2.imread("query.jpg")
+query_faces = detector.detect(query_image)
+if query_faces:
+ query_embedding = recognizer.get_normalized_embedding(
+ query_image, query_faces[0]['landmarks']
+ )
+
+ # Find best match
+ best_match = None
+ best_similarity = -1
+
+ for person_id, db_embedding in database.items():
+ similarity = np.dot(query_embedding, db_embedding.T)[0][0]
+ if similarity > best_similarity:
+ best_similarity = similarity
+ best_match = person_id
+
+ print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
+```
+
+More examples in the [examples/](examples/) directory.
+
+---
+
+## Advanced Configuration
+
+### Custom ONNX Runtime Providers
+
+```python
+from uniface.onnx_utils import get_available_providers, create_onnx_session
+
+# Check available providers
+providers = get_available_providers()
+print(f"Available: {providers}")
+
+# Force CPU-only execution
+from uniface import RetinaFace
+detector = RetinaFace()
+# Internally uses create_onnx_session() which auto-selects best provider
+```
+
+### Model Download and Caching
+
+Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
+
+```python
+from uniface.model_store import verify_model_weights
from uniface.constants import RetinaFaceWeights
-RetinaFace(
- model_name: RetinaFaceWeights,
- conf_thresh: float = 0.5,
- pre_nms_topk: int = 5000,
- nms_thresh: float = 0.4,
- post_nms_topk: int = 750,
- dynamic_size: bool = False,
- input_size: Tuple[int, int] = (640, 640)
+# Manually download and verify a model
+model_path = verify_model_weights(
+ RetinaFaceWeights.MNET_V2,
+ root='./custom_models' # Custom cache directory
)
```
-**Parameters**:
+### Logging Configuration
-- `model_name` _(RetinaFaceWeights)_: Enum value for model to use. Supported values:
- - `MNET_025`, `MNET_050`, `MNET_V1`, `MNET_V2`, `RESNET18`, `RESNET34`
-- `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.
-- `pre_nms_topk` _(int, default=5000)_: Max detections to keep before NMS.
-- `nms_thresh` _(float, default=0.4)_: IoU threshold for Non-Maximum Suppression.
-- `post_nms_topk` _(int, default=750)_: Max detections to keep after NMS.
-- `dynamic_size` _(Optional[bool], default=False)_: Use dynamic input size.
-- `input_size` _(Optional[Tuple[int, int]], default=(640, 640))_: Static input size for the model (width, height).
+```python
+from uniface import Logger
+import logging
+
+# Set logging level
+Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
+
+# Disable logging
+Logger.setLevel(logging.CRITICAL)
+```
---
-### `detect` Method
+## Testing
-```python
-detect(
- image: np.ndarray,
- max_num: int = 0,
- metric: str = "default",
- center_weight: float = 2.0
-) -> Tuple[np.ndarray, np.ndarray]
+```bash
+# Run all tests
+pytest
+
+# Run with coverage
+pytest --cov=uniface --cov-report=html
+
+# Run specific test file
+pytest tests/test_retinaface.py -v
```
-**Description**:
-Detects faces in the given image and returns bounding boxes and landmarks.
-
-**Parameters**:
-
-- `image` _(np.ndarray)_: Input image in BGR format.
-- `max_num` _(int, default=0)_: Maximum number of faces to return. `0` means return all.
-- `metric` _(str, default="default")_: Metric for prioritizing detections:
- - `"default"`: Prioritize detections closer to the image center.
- - `"max"`: Prioritize larger bounding box areas.
-- `center_weight` _(float, default=2.0)_: Weight for prioritizing center-aligned faces.
-
-**Returns**:
-
-- `bounding_boxes` _(np.ndarray)_: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
-- `landmarks` _(np.ndarray)_: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
-
---
-### Visualization Utilities
+## Development
-#### `draw_detections`
+### Setup Development Environment
-```python
-draw_detections(
- image: np.ndarray,
- detections: Tuple[np.ndarray, np.ndarray],
- vis_threshold: float = 0.6
-) -> None
+```bash
+git clone https://github.com/yakhyo/uniface.git
+cd uniface
+
+# Install in editable mode with dev dependencies
+pip install -e ".[dev]"
+
+# Run tests
+pytest
+
+# Format code
+black uniface/
+isort uniface/
```
-**Description**:
-Draws bounding boxes and landmarks on the given image.
+### Project Structure
-**Parameters**:
+```
+uniface/
+├── uniface/
+│ ├── detection/ # Face detection models
+│ ├── recognition/ # Face recognition models
+│ ├── landmark/ # Landmark detection
+│ ├── attribute/ # Age, gender, emotion
+│ ├── onnx_utils.py # ONNX Runtime utilities
+│ ├── model_store.py # Model download & caching
+│ └── visualization.py # Drawing utilities
+├── tests/ # Unit tests
+├── examples/ # Example notebooks
+└── scripts/ # Utility scripts
+```
-- `image` _(np.ndarray)_: The input image in BGR format.
-- `detections` _(Tuple[np.ndarray, np.ndarray])_: A tuple of bounding boxes and landmarks.
-- `vis_threshold` _(float, default=0.6)_: Minimum confidence score for visualization.
+---
+
+## References
+
+### Model Training & Architectures
+
+- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
+- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
+- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
+
+### Papers
+
+- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
+- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
+- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
---
## Contributing
-We welcome contributions to enhance the library! Feel free to:
+Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
-- Submit bug reports or feature requests.
-- Fork the repository and create a pull request.
-
----
-
-## License
-
-This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
-
----
-
-## Acknowledgments
-
-- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
-- Inspired by InsightFace and other face detection projects.
-
----
diff --git a/docs/about/changelog.md b/docs/about/changelog.md
deleted file mode 100644
index 303647e..0000000
--- a/docs/about/changelog.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Changelog
-
-All notable changes to UniFace.
\ No newline at end of file
diff --git a/docs/about/conduct.md b/docs/about/conduct.md
deleted file mode 100644
index ff844c3..0000000
--- a/docs/about/conduct.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Code of Conduct
-
-Guidelines for community behavior.
\ No newline at end of file
diff --git a/docs/about/license.md b/docs/about/license.md
deleted file mode 100644
index e916a65..0000000
--- a/docs/about/license.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# License
-
-MIT License or your custom license here.
\ No newline at end of file
diff --git a/docs/blog/index.md b/docs/blog/index.md
deleted file mode 100644
index c58f16c..0000000
--- a/docs/blog/index.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# Blog
-
diff --git a/docs/index.md b/docs/index.md
deleted file mode 100644
index 91ccce1..0000000
--- a/docs/index.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# UniFace
-
-Welcome to the UniFace documentation.
\ No newline at end of file
diff --git a/docs/installation.md b/docs/installation.md
deleted file mode 100644
index 02ed78f..0000000
--- a/docs/installation.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# 🚀 Installation
-
-## 📦 Install from PyPI
-
-### CPU-only (default):
-
-```bash
-pip install uniface
-```
-
-This installs the CPU-compatible version of ONNX Runtime (`onnxruntime`) and all core dependencies.
-
-### GPU support:
-
-```bash
-pip install "uniface[gpu]"
-```
-
-This installs `onnxruntime-gpu` for accelerated inference on supported NVIDIA GPUs.
-Make sure your system meets the [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/build/eps.html#cuda).
-
----
-
-## 🔧 Install from GitHub (latest version)
-
-Clone the repository and install it manually:
-
-```bash
-git clone https://github.com/yakhyo/uniface.git
-cd uniface
-
-# CPU version
-pip install .
-
-# Or with GPU support
-pip install ".[gpu]"
-```
diff --git a/docs/models/age_gender.md b/docs/models/age_gender.md
deleted file mode 100644
index 41fef72..0000000
--- a/docs/models/age_gender.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Age & Gender Estimation
-
-Age and gender model usage.
\ No newline at end of file
diff --git a/docs/models/detection.md b/docs/models/detection.md
deleted file mode 100644
index aa8ed67..0000000
--- a/docs/models/detection.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Face Detection
-
-Details on face detection models.
\ No newline at end of file
diff --git a/docs/models/gaze.md b/docs/models/gaze.md
deleted file mode 100644
index 3b4f7dd..0000000
--- a/docs/models/gaze.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Gaze Estimation
-
-Gaze detection implementation and usage.
\ No newline at end of file
diff --git a/docs/models/landmarks.md b/docs/models/landmarks.md
deleted file mode 100644
index 6b8d596..0000000
--- a/docs/models/landmarks.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Landmark Detection
-
-Details on landmark prediction.
\ No newline at end of file
diff --git a/docs/models/recognition.md b/docs/models/recognition.md
deleted file mode 100644
index ced611f..0000000
--- a/docs/models/recognition.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Face Recognition
-
-Details on face recognition models.
\ No newline at end of file
diff --git a/docs/overview.md b/docs/overview.md
deleted file mode 100644
index 73c9f03..0000000
--- a/docs/overview.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Overview
-
-High-level overview of UniFace features.
\ No newline at end of file
diff --git a/docs/reference/attribute.md b/docs/reference/attribute.md
deleted file mode 100644
index cd17483..0000000
--- a/docs/reference/attribute.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Facial Attribute API Reference
-
-# Age and Gender Model
-
-::: uniface.attribute.age_gender.AgeGender
-
-# Emotion Model
-
-:::uniface.attribute.emotion.Emotion
diff --git a/docs/reference/detection.md b/docs/reference/detection.md
deleted file mode 100644
index b53a52d..0000000
--- a/docs/reference/detection.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Face Detection API Reference
-
-# RetinaFace
-
-::: uniface.detection.retinaface.RetinaFace
-
-
-# SCRFD
-
-::: uniface.detection.scrfd.SCRFD
diff --git a/docs/reference/landmark.md b/docs/reference/landmark.md
deleted file mode 100644
index 64e571b..0000000
--- a/docs/reference/landmark.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Landmark API Reference
-
-# Landmark Model
-
-::: uniface.landmark.models.Landmark106
diff --git a/docs/reference/recognition.md b/docs/reference/recognition.md
deleted file mode 100644
index d542844..0000000
--- a/docs/reference/recognition.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Face Recognition API Reference
-
-# SphereFace
-
-::: uniface.recognition.models.SphereFace
-
-# MobileFace
-
-:::uniface.recognition.models.MobileFace
-
-# ArcFace
-
-:::uniface.recognition.models.ArcFace
-
-# BaseRecognizer class
-
-:::uniface.recognition.base.BaseRecognizer
diff --git a/docs/tutorials/integration.md b/docs/tutorials/integration.md
deleted file mode 100644
index 662cdb6..0000000
--- a/docs/tutorials/integration.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Integration
-
-How to integrate UniFace into your app.
\ No newline at end of file
diff --git a/docs/tutorials/performance.md b/docs/tutorials/performance.md
deleted file mode 100644
index 1e6e59c..0000000
--- a/docs/tutorials/performance.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Performance Tips
-
-Speed and memory optimization.
\ No newline at end of file
diff --git a/docs/tutorials/quickstart.md b/docs/tutorials/quickstart.md
deleted file mode 100644
index 29d1873..0000000
--- a/docs/tutorials/quickstart.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Quickstart
-
-Get started with UniFace quickly.
\ No newline at end of file
diff --git a/docs/usage.md b/docs/usage.md
deleted file mode 100644
index d677bb1..0000000
--- a/docs/usage.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Usage
-
-How to use UniFace with code examples.
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
deleted file mode 100644
index fff0dd9..0000000
--- a/mkdocs.yml
+++ /dev/null
@@ -1,87 +0,0 @@
-site_name: uniface
-site_url: https://yakhyo.github.io/uniface/
-site_author: Yakhyokhuja Valikhujaev
-site_description: "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
-
-repo_url: https://github.com/yakhyo/uniface
-repo_name: yakhyo/uniface
-
-theme:
- name: material
- features:
- - announce.dismiss
- - content.action.edit
- - content.action.view
- - content.code.annotate
- - content.code.copy
- - content.tooltips
- - navigation.footer
- - navigation.indexes
- - navigation.sections
- - navigation.tabs
- - navigation.top
- - navigation.tracking
- - search.highlight
- - search.share
- - search.suggest
- - toc.follow
- - content.code.expand
- palette:
- - media: "(prefers-color-scheme)"
- toggle:
- icon: material/lightbulb-outline
- name: Switch to light mode
- - media: "(prefers-color-scheme: light)"
- scheme: default
- primary: indigo
- accent: indigo
- toggle:
- icon: material/lightbulb
- name: Switch to dark mode
- - media: "(prefers-color-scheme: dark)"
- scheme: slate
- primary: black
- accent: indigo
- toggle:
- icon: material/lightbulb-off-outline
- name: Switch to system preference
- font:
- text: Roboto
- code: Roboto Mono
- favicon: assets/favicon.png
- icon:
- logo: logo
-
-nav:
- - Home: index.md
- - Overview: overview.md
- - Installation: installation.md
- - Usage: usage.md
- - Models:
- - Face Detection: models/detection.md
- - Face Recognition: models/recognition.md
- - Landmark Detection: models/landmarks.md
- - Age & Gender Estimation: models/age_gender.md
- - Gaze Estimation: models/gaze.md
- - Tutorials:
- - Quickstart: tutorials/quickstart.md
- - App Integration: tutorials/integration.md
- - Performance Tips: tutorials/performance.md
- - API Reference:
- - Detection: reference/detection.md
- - Recognition: reference/recognition.md
- - Landmark: reference/landmark.md
- - Attribute: reference/attribute.md
- - About:
- - Changelog: about/changelog.md
- - License: about/license.md
- - Code of Conduct: about/conduct.md
-
-plugins:
- - blog
- - search:
- separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
- - minify:
- minify_html: true
- - mkdocstrings:
- default_handler: python
diff --git a/pyproject.toml b/pyproject.toml
index 383ebeb..abe7f39 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "uniface"
-version = "0.1.8"
+version = "0.1.9"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
readme = "README.md"
license = { text = "MIT" }
@@ -8,19 +8,20 @@ authors = [
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
]
dependencies = [
- "numpy",
- "opencv-python",
- "onnx",
- "onnxruntime",
- "scikit-image",
- "requests",
- "tqdm"
+ "numpy>=1.21.0",
+ "opencv-python>=4.5.0",
+ "onnx>=1.12.0",
+ "onnxruntime>=1.16.0",
+ "scikit-image>=0.19.0",
+ "requests>=2.28.0",
+ "tqdm>=4.64.0"
]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
[project.optional-dependencies]
-dev = ["pytest"]
-gpu = ["onnxruntime-gpu"]
+dev = ["pytest>=7.0.0"]
+gpu = ["onnxruntime-gpu>=1.16.0"]
+silicon = ["onnxruntime-silicon>=1.16.0"]
[project.urls]
Homepage = "https://github.com/yakhyo/uniface"
@@ -34,4 +35,4 @@ build-backend = "setuptools.build_meta"
packages = ["uniface"]
[tool.setuptools.package-data]
-"uniface" = ["*.txt", "*.md"]
\ No newline at end of file
+"uniface" = ["*.txt", "*.md"]
diff --git a/requirements.txt b/requirements.txt
index 8df2fff..5a72aee 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-numpy
-opencv-python
-onnx
-onnxruntime-gpu
-scikit-image
-requests
-pytest
-tqdm
\ No newline at end of file
+numpy>=1.21.0
+opencv-python>=4.5.0
+onnx>=1.12.0
+onnxruntime>=1.16.0
+scikit-image>=0.19.0
+requests>=2.28.0
+pytest>=7.0.0
+tqdm>=4.64.0
diff --git a/requirements_mkdocs.txt b/requirements_mkdocs.txt
deleted file mode 100644
index fd499df..0000000
--- a/requirements_mkdocs.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-mkdocs-material
-mkdocs-minify-plugin
-mkdocstrings[python]
diff --git a/scripts/TESTING.md b/scripts/TESTING.md
new file mode 100644
index 0000000..38b8e43
--- /dev/null
+++ b/scripts/TESTING.md
@@ -0,0 +1,389 @@
+# Testing Scripts Guide
+
+Complete guide to testing all scripts in the `scripts/` directory.
+
+---
+
+## 📁 Available Scripts
+
+1. **download_model.py** - Download and verify model weights
+2. **run_detection.py** - Face detection on images
+3. **run_recognition.py** - Face recognition (extract embeddings)
+4. **run_face_search.py** - Real-time face matching with webcam
+5. **sha256_generate.py** - Generate SHA256 checksums for models
+
+---
+
+## Testing Each Script
+
+### 1. Test Model Download
+
+```bash
+# Download a specific model
+python scripts/download_model.py --model MNET_V2
+
+# Download all RetinaFace models (takes ~5 minutes, ~200MB)
+python scripts/download_model.py
+
+# Verify models are cached
+ls -lh ~/.uniface/models/
+```
+
+**Expected Output:**
+```
+📥 Downloading model: retinaface_mnet_v2
+2025-11-08 00:00:00 - INFO - Downloading model 'RetinaFaceWeights.MNET_V2' from https://...
+Downloading ~/.uniface/models/retinaface_mnet_v2.onnx: 100%|████| 3.5M/3.5M
+2025-11-08 00:00:05 - INFO - Successfully downloaded 'RetinaFaceWeights.MNET_V2'
+✅ All requested weights are ready and verified.
+```
+
+---
+
+### 2. Test Face Detection
+
+```bash
+# Basic detection
+python scripts/run_detection.py --image assets/test.jpg
+
+# With custom settings
+python scripts/run_detection.py \
+ --image assets/test.jpg \
+ --method scrfd \
+ --threshold 0.7 \
+ --save_dir outputs
+
+# Benchmark mode (100 iterations)
+python scripts/run_detection.py \
+ --image assets/test.jpg \
+ --iterations 100
+```
+
+**Expected Output:**
+```
+Initializing detector: retinaface
+2025-11-08 00:00:00 - INFO - Initializing RetinaFace with model=RetinaFaceWeights.MNET_V2...
+2025-11-08 00:00:01 - INFO - CoreML acceleration enabled (Apple Silicon)
+✅ Output saved at: outputs/test_out.jpg
+[1/1] ⏱️ Inference time: 0.0234 seconds
+```
+
+**Verify Output:**
+```bash
+# Check output image was created
+ls -lh outputs/test_out.jpg
+
+# View the image (macOS)
+open outputs/test_out.jpg
+```
+
+---
+
+### 3. Test Face Recognition (Embedding Extraction)
+
+```bash
+# Extract embeddings from an image
+python scripts/run_recognition.py --image assets/test.jpg
+
+# With different models
+python scripts/run_recognition.py \
+ --image assets/test.jpg \
+ --detector scrfd \
+ --recognizer mobileface
+```
+
+**Expected Output:**
+```
+Initializing detector: retinaface
+Initializing recognizer: arcface
+2025-11-08 00:00:00 - INFO - Successfully initialized face encoder from ~/.uniface/models/w600k_mbf.onnx
+Detected 1 face(s). Extracting embeddings for the first face...
+ - Embedding shape: (1, 512)
+ - L2 norm of unnormalized embedding: 64.2341
+ - L2 norm of normalized embedding: 1.0000
+```
+
+---
+
+### 4. Test Real-Time Face Search (Webcam)
+
+**Prerequisites:**
+- Webcam connected
+- Reference image with a clear face
+
+```bash
+# Basic usage
+python scripts/run_face_search.py --image assets/test.jpg
+
+# With custom models
+python scripts/run_face_search.py \
+ --image assets/test.jpg \
+ --detector scrfd \
+ --recognizer arcface
+```
+
+**Expected Behavior:**
+1. Webcam window opens
+2. Faces are detected in real-time
+3. Green box = Match (similarity > 0.4)
+4. Red box = Unknown (similarity < 0.4)
+5. Press 'q' to quit
+
+**Expected Output:**
+```
+Initializing models...
+2025-11-08 00:00:00 - INFO - CoreML acceleration enabled (Apple Silicon)
+Extracting reference embedding...
+Webcam started. Press 'q' to quit.
+```
+
+**Troubleshooting:**
+```bash
+# If webcam doesn't open
+python -c "import cv2; cap = cv2.VideoCapture(0); print('Webcam OK' if cap.isOpened() else 'Webcam FAIL')"
+
+# If no faces detected
+# - Ensure good lighting
+# - Face should be frontal and clearly visible
+# - Try lowering threshold: edit script line 29, change 0.4 to 0.3
+```
+
+---
+
+### 5. Test SHA256 Generator (For Developers)
+
+```bash
+# Generate checksum for a model file
+python scripts/sha256_generate.py ~/.uniface/models/retinaface_mnet_v2.onnx
+
+# Generate for all models
+for model in ~/.uniface/models/*.onnx; do
+ python scripts/sha256_generate.py "$model"
+done
+```
+
+---
+
+## 🔍 Quick Verification Tests
+
+### Test 1: Imports Work
+
+```bash
+python -c "
+from uniface.detection import create_detector
+from uniface.recognition import create_recognizer
+print('✅ Imports successful')
+"
+```
+
+### Test 2: Models Download
+
+```bash
+python -c "
+from uniface import RetinaFace
+detector = RetinaFace()
+print('✅ Model downloaded and loaded')
+"
+```
+
+### Test 3: Detection Works
+
+```bash
+python -c "
+import cv2
+import numpy as np
+from uniface import RetinaFace
+
+detector = RetinaFace()
+image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
+faces = detector.detect(image)
+print(f'✅ Detection works, found {len(faces)} faces')
+"
+```
+
+### Test 4: Recognition Works
+
+```bash
+python -c "
+import cv2
+import numpy as np
+from uniface import RetinaFace, ArcFace
+
+detector = RetinaFace()
+recognizer = ArcFace()
+image = cv2.imread('assets/test.jpg')
+faces = detector.detect(image)
+if faces:
+ landmarks = np.array(faces[0]['landmarks'])
+ embedding = recognizer.get_normalized_embedding(image, landmarks)
+ print(f'✅ Recognition works, embedding shape: {embedding.shape}')
+else:
+ print('⚠️ No faces detected in test image')
+"
+```
+
+---
+
+## End-to-End Test Workflow
+
+Run this complete workflow to verify everything works:
+
+```bash
+#!/bin/bash
+# Save as test_all_scripts.sh
+
+echo "=== Testing UniFace Scripts ==="
+echo ""
+
+# Test 1: Download models
+echo "1️⃣ Testing model download..."
+python scripts/download_model.py --model MNET_V2
+if [ $? -eq 0 ]; then
+ echo "✅ Model download: PASS"
+else
+ echo "❌ Model download: FAIL"
+ exit 1
+fi
+echo ""
+
+# Test 2: Face detection
+echo "2️⃣ Testing face detection..."
+python scripts/run_detection.py --image assets/test.jpg --save_dir /tmp/uniface_test
+if [ $? -eq 0 ] && [ -f /tmp/uniface_test/test_out.jpg ]; then
+ echo "✅ Face detection: PASS"
+else
+ echo "❌ Face detection: FAIL"
+ exit 1
+fi
+echo ""
+
+# Test 3: Face recognition
+echo "3️⃣ Testing face recognition..."
+python scripts/run_recognition.py --image assets/test.jpg > /tmp/uniface_recognition.log
+if [ $? -eq 0 ] && grep -q "Embedding shape" /tmp/uniface_recognition.log; then
+ echo "✅ Face recognition: PASS"
+else
+ echo "❌ Face recognition: FAIL"
+ exit 1
+fi
+echo ""
+
+echo "=== All Tests Passed! 🎉 ==="
+```
+
+**Run the test suite:**
+```bash
+chmod +x test_all_scripts.sh
+./test_all_scripts.sh
+```
+
+---
+
+## Performance Benchmarking
+
+### Benchmark Detection Speed
+
+```bash
+# Test different models
+for model in retinaface scrfd; do
+ echo "Testing $model..."
+ python scripts/run_detection.py \
+ --image assets/test.jpg \
+ --method $model \
+ --iterations 50
+done
+```
+
+### Benchmark Recognition Speed
+
+```bash
+# Test different recognizers
+for recognizer in arcface mobileface; do
+ echo "Testing $recognizer..."
+ time python scripts/run_recognition.py \
+ --image assets/test.jpg \
+ --recognizer $recognizer
+done
+```
+
+---
+
+## 🐛 Common Issues
+
+### Issue: "No module named 'uniface'"
+
+```bash
+# Solution: Install in editable mode
+pip install -e .
+```
+
+### Issue: "Failed to load image"
+
+```bash
+# Check image exists
+ls -lh assets/test.jpg
+
+# Try with absolute path
+python scripts/run_detection.py --image $(pwd)/assets/test.jpg
+```
+
+### Issue: "No faces detected"
+
+```bash
+# Lower confidence threshold
+python scripts/run_detection.py \
+ --image assets/test.jpg \
+ --threshold 0.3
+```
+
+### Issue: Models downloading slowly
+
+```bash
+# Check internet connection
+curl -I https://github.com/yakhyo/uniface/releases
+
+# Or download manually
+wget https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv2.onnx \
+ -O ~/.uniface/models/retinaface_mnet_v2.onnx
+```
+
+### Issue: CoreML not available on Mac
+
+```bash
+# Install CoreML-enabled ONNX Runtime
+pip uninstall onnxruntime
+pip install onnxruntime-silicon
+
+# Verify
+python -c "import onnxruntime as ort; print(ort.get_available_providers())"
+# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
+```
+
+---
+
+## ✅ Script Status Summary
+
+| Script | Status | API Updated | Tested |
+|-----------------------|--------|-------------|--------|
+| download_model.py | ✅ | ✅ | ✅ |
+| run_detection.py | ✅ | ✅ | ✅ |
+| run_recognition.py | ✅ | ✅ | ✅ |
+| run_face_search.py | ✅ | ✅ | ✅ |
+| sha256_generate.py | ✅ | N/A | ✅ |
+
+All scripts are updated and working with the new dict-based API! 🎉
+
+---
+
+## 📝 Notes
+
+- All scripts now use the factory functions (`create_detector`, `create_recognizer`)
+- Scripts work with the new dict-based detection API
+- Model download bug is fixed (enum vs string issue)
+- CoreML acceleration is automatically detected on Apple Silicon
+- All scripts include proper error handling
+
+---
+
+Need help with a specific script? Check the main [README.md](../README.md) or [QUICKSTART.md](../QUICKSTART.md)!
+
diff --git a/scripts/download_model.py b/scripts/download_model.py
index df78fdd..8f5d0b2 100644
--- a/scripts/download_model.py
+++ b/scripts/download_model.py
@@ -16,11 +16,11 @@ def main():
if args.model:
weight = RetinaFaceWeights[args.model]
print(f"📥 Downloading model: {weight.value}")
- verify_model_weights(weight.value)
+ verify_model_weights(weight) # Pass enum, not string
else:
print("📥 Downloading all models...")
for weight in RetinaFaceWeights:
- verify_model_weights(weight.value)
+ verify_model_weights(weight) # Pass enum, not string
print("✅ All requested weights are ready and verified.")
diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py
index adba469..9e3d140 100644
--- a/scripts/run_recognition.py
+++ b/scripts/run_recognition.py
@@ -6,9 +6,6 @@ import numpy as np
from uniface.detection import create_detector
from uniface.recognition import create_recognizer
-# Import enums for argument choices
-from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
-
def run_inference(detector, recognizer, image_path: str):
"""
@@ -67,7 +64,7 @@ def main():
args = parser.parse_args()
print(f"Initializing detector: {args.detector}")
- detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2)
+ detector = create_detector(method=args.detector)
print(f"Initializing recognizer: {args.recognizer}")
recognizer = create_recognizer(method=args.recognizer)
diff --git a/tests/test_retinaface.py b/tests/test_retinaface.py
index 1dc99c1..1ec4f27 100644
--- a/tests/test_retinaface.py
+++ b/tests/test_retinaface.py
@@ -1,7 +1,8 @@
-import pytest
import numpy as np
-from uniface import RetinaFace
+import pytest
+
from uniface.constants import RetinaFaceWeights
+from uniface.detection import RetinaFace
@pytest.fixture
@@ -32,20 +33,27 @@ def test_inference_on_640x640_image(retinaface_model):
# Generate a mock 640x640 BGR image
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
- # Run inference
- detections, landmarks = retinaface_model.detect(mock_image)
+ # Run inference - returns list of dictionaries
+ faces = retinaface_model.detect(mock_image)
- # Check output types
- assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
- assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
+ # Check output type
+ assert isinstance(faces, list), "Detections should be a list."
- # Check that detections have the expected shape
- if detections.size > 0: # If faces are detected
- assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
+ # Check that each face has the expected structure
+ for face in faces:
+ assert isinstance(face, dict), "Each detection should be a dictionary."
+ assert "bbox" in face, "Each detection should have a 'bbox' key."
+ assert "confidence" in face, "Each detection should have a 'confidence' key."
+ assert "landmarks" in face, "Each detection should have a 'landmarks' key."
- # Check landmarks shape
- if landmarks.size > 0:
- assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
+ # Check bbox format
+ bbox = face["bbox"]
+ assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
+
+ # Check landmarks format
+ landmarks = face["landmarks"]
+ assert len(landmarks) == 5, "Should have 5 landmark points."
+ assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
def test_confidence_threshold(retinaface_model):
@@ -56,12 +64,12 @@ def test_confidence_threshold(retinaface_model):
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
# Run inference
- detections, _ = retinaface_model.detect(mock_image)
+ faces = retinaface_model.detect(mock_image)
# Ensure all detections have confidence scores above the threshold
- if detections.size > 0: # If faces are detected
- confidence_scores = detections[:, 4]
- assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
+ for face in faces:
+ confidence = face["confidence"]
+ assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
def test_no_faces_detected(retinaface_model):
@@ -72,8 +80,7 @@ def test_no_faces_detected(retinaface_model):
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
# Run inference
- detections, landmarks = retinaface_model.detect(empty_image)
+ faces = retinaface_model.detect(empty_image)
- # Ensure no detections or landmarks are found
- assert detections.size == 0, "Detections should be empty for a blank image."
- assert landmarks.size == 0, "Landmarks should be empty for a blank image."
+ # Ensure no detections are found
+ assert len(faces) == 0, "Should detect no faces in a blank image."
diff --git a/uniface/__init__.py b/uniface/__init__.py
index be0399e..c72a79b 100644
--- a/uniface/__init__.py
+++ b/uniface/__init__.py
@@ -13,35 +13,45 @@
__license__ = "MIT"
__author__ = "Yakhyokhuja Valikhujaev"
-__version__ = "0.1.8"
+__version__ = "0.1.9"
-from .detection import detect_faces, create_detector, list_available_detectors
-from .recognition import create_recognizer
-from .landmark import create_landmarker
-
-from uniface.face_utils import face_alignment, compute_similarity
+from uniface.face_utils import compute_similarity, face_alignment
+from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.visualization import draw_detections
-from uniface.log import Logger
-
+from .attribute import AgeGender, Emotion
+from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
+from .landmark import Landmark106, create_landmarker
+from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
__all__ = [
- '__author__',
- '__license__',
- '__version__',
-
- 'create_detector',
- 'create_landmarker',
- 'create_recognizer',
- 'detect_faces',
- 'list_available_detectors',
-
- 'compute_similarity',
- 'draw_detections',
- 'face_alignment',
- 'verify_model_weights',
-
- 'Logger'
+ "__author__",
+ "__license__",
+ "__version__",
+ # Factory functions
+ "create_detector",
+ "create_landmarker",
+ "create_recognizer",
+ "detect_faces",
+ "list_available_detectors",
+ # Detection models
+ "RetinaFace",
+ "SCRFD",
+ # Recognition models
+ "ArcFace",
+ "MobileFace",
+ "SphereFace",
+ # Landmark models
+ "Landmark106",
+ # Attribute models
+ "AgeGender",
+ "Emotion",
+ # Utilities
+ "compute_similarity",
+ "draw_detections",
+ "face_alignment",
+ "verify_model_weights",
+ "Logger",
]
diff --git a/uniface/attribute/age_gender.py b/uniface/attribute/age_gender.py
index 6820f7f..ad5836f 100644
--- a/uniface/attribute/age_gender.py
+++ b/uniface/attribute/age_gender.py
@@ -2,16 +2,17 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
+from typing import List, Tuple, Union
+
import cv2
import numpy as np
-import onnxruntime as ort
-from typing import Tuple, Union, List
from uniface.attribute.base import Attribute
-from uniface.log import Logger
from uniface.constants import AgeGenderWeights
from uniface.face_utils import bbox_center_alignment
+from uniface.log import Logger
from uniface.model_store import verify_model_weights
+from uniface.onnx_utils import create_onnx_session
__all__ = ["AgeGender"]
@@ -42,10 +43,7 @@ class AgeGender(Attribute):
Initializes the ONNX model and creates an inference session.
"""
try:
- self.session = ort.InferenceSession(
- self.model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
+ self.session = create_onnx_session(self.model_path)
# Get model input details from the loaded model
input_meta = self.session.get_inputs()[0]
self.input_name = input_meta.name
@@ -75,16 +73,10 @@ class AgeGender(Attribute):
# **Rotation parameter restored here**
rotation = 0.0
- aligned_face, _ = bbox_center_alignment(
- image, center, self.input_size[1], scale, rotation
- )
+ aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
blob = cv2.dnn.blobFromImage(
- aligned_face,
- scalefactor=1.0,
- size=self.input_size[::-1],
- mean=(0.0, 0.0, 0.0),
- swapRB=True
+ aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
)
return blob
@@ -127,8 +119,8 @@ class AgeGender(Attribute):
if __name__ == "__main__":
# To run this script, you need to have uniface.detection installed
# or available in your path.
- from uniface.detection import create_detector
from uniface.constants import RetinaFaceWeights
+ from uniface.detection import create_detector
print("Initializing models for live inference...")
# 1. Initialize the face detector
@@ -156,7 +148,7 @@ if __name__ == "__main__":
# For each detected face, predict age and gender
for detection in detections:
- box = detection['bbox']
+ box = detection["bbox"]
x1, y1, x2, y2 = map(int, box)
# Predict attributes
@@ -171,7 +163,7 @@ if __name__ == "__main__":
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
- if cv2.waitKey(1) & 0xFF == ord('q'):
+ if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Release resources
diff --git a/uniface/detection/retinaface.py b/uniface/detection/retinaface.py
index d5262da..cff370c 100644
--- a/uniface/detection/retinaface.py
+++ b/uniface/detection/retinaface.py
@@ -3,13 +3,13 @@
# GitHub: https://github.com/yakhyo
import numpy as np
-import onnxruntime as ort
from typing import Tuple, List, Literal, Dict, Any
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.constants import RetinaFaceWeights
+from uniface.onnx_utils import create_onnx_session
from .base import BaseDetector
from .utils import (
@@ -95,10 +95,7 @@ class RetinaFace(BaseDetector):
RuntimeError: If the model fails to load, logs an error and raises an exception.
"""
try:
- self.session = ort.InferenceSession(
- model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
+ self.session = create_onnx_session(model_path)
self.input_names = self.session.get_inputs()[0].name
self.output_names = [x.name for x in self.session.get_outputs()]
Logger.info(f"Successfully initialized the model from {model_path}")
diff --git a/uniface/detection/scrfd.py b/uniface/detection/scrfd.py
index e4966ee..c8ceab3 100644
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -2,20 +2,20 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
+from typing import Any, Dict, List, Literal, Tuple
+
import cv2
import numpy as np
-import onnxruntime as ort
-from typing import Tuple, List, Literal, Dict, Any
-
-from uniface.log import Logger
from uniface.constants import SCRFDWeights
+from uniface.log import Logger
from uniface.model_store import verify_model_weights
+from uniface.onnx_utils import create_onnx_session
from .base import BaseDetector
-from .utils import non_max_supression, distance2bbox, distance2kps, resize_image
+from .utils import distance2bbox, distance2kps, non_max_supression, resize_image
-__all__ = ['SCRFD']
+__all__ = ["SCRFD"]
class SCRFD(BaseDetector):
@@ -27,7 +27,7 @@ class SCRFD(BaseDetector):
Args:
**kwargs: Keyword arguments passed to BaseDetector and SCRFD. Supported keys include:
- model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`).
+ model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`).
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
nms_thresh (float, optional): Non-Maximum Suppression threshold. Defaults to 0.4.
@@ -52,10 +52,10 @@ class SCRFD(BaseDetector):
super().__init__(**kwargs)
self._supports_landmarks = True # SCRFD supports landmarks
- model_name = kwargs.get('model_name', SCRFDWeights.SCRFD_10G_KPS)
- conf_thresh = kwargs.get('conf_thresh', 0.5)
- nms_thresh = kwargs.get('nms_thresh', 0.4)
- input_size = kwargs.get('input_size', (640, 640))
+ model_name = kwargs.get("model_name", SCRFDWeights.SCRFD_10G_KPS)
+ conf_thresh = kwargs.get("conf_thresh", 0.5)
+ nms_thresh = kwargs.get("nms_thresh", 0.4)
+ input_size = kwargs.get("input_size", (640, 640))
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
@@ -91,10 +91,7 @@ class SCRFD(BaseDetector):
RuntimeError: If the model fails to load, logs an error and raises an exception.
"""
try:
- self.session = ort.InferenceSession(
- model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
+ self.session = create_onnx_session(model_path)
self.input_names = self.session.get_inputs()[0].name
self.output_names = [x.name for x in self.session.get_outputs()]
Logger.info(f"Successfully initialized the model from {model_path}")
@@ -140,7 +137,7 @@ class SCRFD(BaseDetector):
for idx, stride in enumerate(self._feat_stride_fpn):
scores = outputs[idx]
bbox_preds = outputs[fmc + idx] * stride
- kps_preds = outputs[2*fmc + idx] * stride
+ kps_preds = outputs[2 * fmc + idx] * stride
# Generate anchors
fm_height = image_size[0] // stride
@@ -176,11 +173,7 @@ class SCRFD(BaseDetector):
return scores_list, bboxes_list, kpss_list
def detect(
- self,
- image: np.ndarray,
- max_num: int = 0,
- metric: Literal["default", "max"] = "max",
- center_weight: float = 2
+ self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -191,7 +184,7 @@ class SCRFD(BaseDetector):
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
- "default": Prioritize detections closer to the image center.
- "max": Prioritize detections with larger bounding box areas.
- center_weight (float): Weight for penalizing detections farther from the image center
+ center_weight (float): Weight for penalizing detections farther from the image center
when using the "default" metric. Defaults to 2.0.
Returns:
@@ -212,6 +205,10 @@ class SCRFD(BaseDetector):
scores_list, bboxes_list, kpss_list = self.postprocess(outputs, image_size=image.shape[:2])
+ # Handle case when no faces are detected
+ if not scores_list:
+ return []
+
scores = np.vstack(scores_list)
scores_ravel = scores.ravel()
order = scores_ravel.argsort()[::-1]
@@ -256,9 +253,9 @@ class SCRFD(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
- 'bbox': detections[i, :4].astype(float).tolist(),
- 'confidence': detections[i, 4].item(),
- 'landmarks': landmarks[i].astype(float).tolist()
+ "bbox": detections[i, :4].astype(float).tolist(),
+ "confidence": detections[i, 4].item(),
+ "landmarks": landmarks[i].astype(float).tolist(),
}
faces.append(face_dict)
@@ -273,7 +270,7 @@ def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
- for (x, y) in points.astype(np.int32):
+ for x, y in points.astype(np.int32):
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
@@ -300,9 +297,9 @@ if __name__ == "__main__":
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
- bbox = face['bbox'] # [x1, y1, x2, y2]
- landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
- confidence = face['confidence']
+ bbox = face["bbox"] # [x1, y1, x2, y2]
+ landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
+ confidence = face["confidence"]
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
@@ -314,8 +311,7 @@ if __name__ == "__main__":
draw_keypoints(frame, points)
# Display face count
- cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+ cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
diff --git a/uniface/landmark/models.py b/uniface/landmark/models.py
index f4d28ef..824727d 100644
--- a/uniface/landmark/models.py
+++ b/uniface/landmark/models.py
@@ -4,13 +4,13 @@
import cv2
import numpy as np
-import onnxruntime as ort
from typing import Tuple
from uniface.log import Logger
from uniface.constants import LandmarkWeights
from uniface.model_store import verify_model_weights
from uniface.face_utils import bbox_center_alignment, transform_points_2d
+from uniface.onnx_utils import create_onnx_session
from .base import BaseLandmarker
__all__ = ['Landmark']
@@ -63,10 +63,7 @@ class Landmark106(BaseLandmarker):
RuntimeError: If the model fails to load or initialize.
"""
try:
- self.session = ort.InferenceSession(
- self.model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
+ self.session = create_onnx_session(self.model_path)
# Get input configuration
input_metadata = self.session.get_inputs()[0]
diff --git a/uniface/model_store.py b/uniface/model_store.py
index 004fc4a..4648aed 100644
--- a/uniface/model_store.py
+++ b/uniface/model_store.py
@@ -46,14 +46,14 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
root = os.path.expanduser(root)
os.makedirs(root, exist_ok=True)
- model_name = model_name.value
+ # Keep model_name as enum for dictionary lookup
url = const.MODEL_URLS.get(model_name)
if not url:
Logger.error(f"No URL found for model '{model_name}'")
raise ValueError(f"No URL found for model '{model_name}'")
file_ext = os.path.splitext(url)[1]
- model_path = os.path.normpath(os.path.join(root, f'{model_name}{file_ext}'))
+ model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
if not os.path.exists(model_path):
Logger.info(f"Downloading model '{model_name}' from {url}")
diff --git a/uniface/onnx_utils.py b/uniface/onnx_utils.py
new file mode 100644
index 0000000..54e7a30
--- /dev/null
+++ b/uniface/onnx_utils.py
@@ -0,0 +1,87 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+"""
+Utilities for ONNX Runtime configuration and provider selection.
+"""
+
+from typing import List
+
+import onnxruntime as ort
+
+from uniface.log import Logger
+
+
+def get_available_providers() -> List[str]:
+ """
+ Get list of available ONNX Runtime execution providers for the current platform.
+
+ Automatically detects and prioritizes hardware acceleration:
+ - CoreML on Apple Silicon (M1/M2/M3/M4)
+ - CUDA on NVIDIA GPUs
+ - CPU as fallback (always available)
+
+ Returns:
+ List[str]: Ordered list of execution providers to use
+
+ Examples:
+ >>> providers = get_available_providers()
+ >>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
+ >>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
+ >>> # On CPU-only: ['CPUExecutionProvider']
+ """
+ available = ort.get_available_providers()
+ providers = []
+
+ # Priority order: CoreML > CUDA > CPU
+ if "CoreMLExecutionProvider" in available:
+ providers.append("CoreMLExecutionProvider")
+ Logger.info("CoreML acceleration enabled (Apple Silicon)")
+
+ if "CUDAExecutionProvider" in available:
+ providers.append("CUDAExecutionProvider")
+ Logger.info("CUDA acceleration enabled (NVIDIA GPU)")
+
+ # CPU is always available as fallback
+ providers.append("CPUExecutionProvider")
+
+ if len(providers) == 1:
+ Logger.info("Using CPU execution (no hardware acceleration detected)")
+
+ return providers
+
+
+def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
+ """
+ Create an ONNX Runtime inference session with optimal provider selection.
+
+ Args:
+ model_path (str): Path to the ONNX model file
+ providers (List[str], optional): List of providers to use.
+ If None, automatically detects best available providers.
+
+ Returns:
+ ort.InferenceSession: Configured ONNX Runtime session
+
+ Raises:
+ RuntimeError: If session creation fails
+
+ Examples:
+ >>> session = create_onnx_session("model.onnx")
+ >>> # Automatically uses best available providers
+
+ >>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
+ >>> # Force CPU-only execution
+ """
+ if providers is None:
+ providers = get_available_providers()
+
+ try:
+ session = ort.InferenceSession(model_path, providers=providers)
+ active_provider = session.get_providers()[0]
+ Logger.debug(f"Session created with provider: {active_provider}")
+ return session
+ except Exception as e:
+ Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)
+ raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}") from e
diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py
index b617e46..f581d5e 100644
--- a/uniface/recognition/base.py
+++ b/uniface/recognition/base.py
@@ -5,12 +5,12 @@
from abc import ABC, abstractmethod
import cv2
import numpy as np
-import onnxruntime as ort
from dataclasses import dataclass
from typing import Tuple, Union, List
from uniface.log import Logger
from uniface.face_utils import face_alignment
+from uniface.onnx_utils import create_onnx_session
@dataclass
@@ -53,10 +53,7 @@ class BaseRecognizer(ABC):
"""
try:
# Initialize model session with available providers
- self.session = ort.InferenceSession(
- self.model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
+ self.session = create_onnx_session(self.model_path)
# Extract input configuration
input_cfg = self.session.get_inputs()[0]
diff --git a/uniface/retinaface.py b/uniface/retinaface.py
deleted file mode 100644
index 876de8d..0000000
--- a/uniface/retinaface.py
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright 2025 Yakhyokhuja Valikhujaev
-# Author: Yakhyokhuja Valikhujaev
-# GitHub: https://github.com/yakhyo
-
-import os
-import cv2
-import numpy as np
-import onnxruntime as ort
-
-from typing import Tuple, List, Literal
-
-from uniface.log import Logger
-from uniface.model_store import verify_model_weights
-from uniface.constants import RetinaFaceWeights
-from uniface.common import (
- non_max_supression,
- resize_image,
- decode_boxes,
- generate_anchors,
- decode_landmarks
-)
-
-
-class RetinaFace:
- """
- Face detector based on the RetinaFace architecture.
-
- Args:
- model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
- conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
- nms_thresh (float): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
- pre_nms_topk (int): Number of top-scoring boxes considered before applying NMS. Defaults to 5000.
- post_nms_topk (int): Maximum number of final detections retained after NMS. Defaults to 750.
- dynamic_size (bool): If True, anchors are generated dynamically per input image size. Defaults to False.
- input_size (Tuple[int, int]): Fixed input size (width, height) used when `dynamic_size` is False. Ignored if `dynamic_size=True`.
-
- Attributes:
- conf_thresh (float): Threshold for filtering detections based on confidence score.
- nms_thresh (float): IoU threshold for NMS.
- pre_nms_topk (int): Limit on boxes considered before NMS.
- post_nms_topk (int): Limit on detections kept after NMS.
- dynamic_size (bool): Whether anchors are generated dynamically.
- input_size (Tuple[int, int]): Static input size when `dynamic_size` is False.
- _model_path (str): Path to verified model weights. (Internal)
- _priors (np.ndarray): Anchor boxes used for detection. Precomputed if static input size is used. (Internal)
-
- Raises:
- ValueError: If model weights are invalid or not found.
- RuntimeError: If the model fails to initialize.
- """
-
- def __init__(
- self,
- model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
- conf_thresh: float = 0.5,
- nms_thresh: float = 0.4,
- pre_nms_topk: int = 5000,
- post_nms_topk: int = 750,
- dynamic_size: bool = False,
- input_size: Tuple[int, int] = (640, 640), # Default input size if dynamic_size=False
- ) -> None:
-
- self.conf_thresh = conf_thresh
- self.nms_thresh = nms_thresh
- self.pre_nms_topk = pre_nms_topk
- self.post_nms_topk = post_nms_topk
- self.dynamic_size = dynamic_size
- self.input_size = input_size
-
- Logger.info(
- f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
- f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
- f"input_size={input_size}"
- )
-
- # Get path to model weights
- self._model_path = verify_model_weights(model_name)
- Logger.info(f"Verified model weights located at: {self._model_path}")
-
- # Precompute anchors if using static size
- if not dynamic_size and input_size is not None:
- self._priors = generate_anchors(image_size=input_size)
- Logger.debug("Generated anchors for static input size.")
-
- # Initialize model
- self._initialize_model(self._model_path)
-
- def _initialize_model(self, model_path: str) -> None:
- """
- Initializes an ONNX model session from the given path.
-
- Args:
- model_path (str): The file path to the ONNX model.
-
- Raises:
- RuntimeError: If the model fails to load, logs an error and raises an exception.
- """
- try:
- self.session = ort.InferenceSession(
- model_path,
- providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
- )
- self.input_names = self.session.get_inputs()[0].name
- self.output_names = [x.name for x in self.session.get_outputs()]
- Logger.info(f"Successfully initialized the model from {model_path}")
- except Exception as e:
- Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
- raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
-
- def preprocess(self, image: np.ndarray) -> np.ndarray:
- """Preprocess input image for model inference.
-
- Args:
- image (np.ndarray): Input image.
-
- Returns:
- np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
- """
- image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
- image = image.transpose(2, 0, 1) # HWC to CHW
- image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
- return image
-
- def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
- """Perform model inference on the preprocessed image tensor.
-
- Args:
- input_tensor (np.ndarray): Preprocessed input tensor.
-
- Returns:
- Tuple[np.ndarray, np.ndarray]: Raw model outputs.
- """
- return self.session.run(self.output_names, {self.input_names: input_tensor})
-
- def detect(
- self,
- image: np.ndarray,
- max_num: int = 0,
- metric: Literal["default", "max"] = "max",
- center_weight: float = 2.0
- ) -> Tuple[np.ndarray, np.ndarray]:
- """
- Perform face detection on an input image and return bounding boxes and facial landmarks.
-
- Args:
- image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
- max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
- metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
- - "default": Prioritize detections closer to the image center.
- - "max": Prioritize detections with larger bounding box areas.
- center_weight (float): Weight for penalizing detections farther from the image center
- when using the "default" metric. Defaults to 2.0.
-
- Returns:
- Tuple[np.ndarray, np.ndarray]:
- - detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score].
- - landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points.
- """
-
- original_height, original_width = image.shape[:2]
-
- if self.dynamic_size:
- height, width, _ = image.shape
- self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
- resize_factor = 1.0 # No resizing
- else:
- image, resize_factor = resize_image(image, target_shape=self.input_size)
-
- height, width, _ = image.shape
- image_tensor = self.preprocess(image)
-
- # ONNXRuntime inference
- outputs = self.inference(image_tensor)
-
- # Postprocessing
- detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
-
- if max_num > 0 and detections.shape[0] > max_num:
- # Calculate area of detections
- areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
-
- # Calculate offsets from image center
- center = (original_height // 2, original_width // 2)
- offsets = np.vstack([
- (detections[:, 0] + detections[:, 2]) / 2 - center[1],
- (detections[:, 1] + detections[:, 3]) / 2 - center[0]
- ])
- offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
-
- # Calculate scores based on the chosen metric
- if metric == 'max':
- scores = areas
- else:
- scores = areas - offset_dist_squared * center_weight
-
- # Sort by scores and select top `max_num`
- sorted_indices = np.argsort(scores)[::-1][:max_num]
-
- detections = detections[sorted_indices]
- landmarks = landmarks[sorted_indices]
-
- return detections, landmarks
-
- def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
- """
- Process the model outputs into final detection results.
-
- Args:
- outputs (List[np.ndarray]): Raw outputs from the detection model.
- - outputs[0]: Location predictions (bounding box coordinates).
- - outputs[1]: Class confidence scores.
- - outputs[2]: Landmark predictions.
- resize_factor (float): Factor used to resize the input image during preprocessing.
- shape (Tuple[int, int]): Original shape of the image as (height, width).
-
- Returns:
- Tuple[np.ndarray, np.ndarray]: Processed results containing:
- - detections (np.ndarray): Array of detected bounding boxes with confidence scores.
- Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
- - landmarks (np.ndarray): Array of detected facial landmarks.
- Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
- """
- loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
-
- # Decode boxes and landmarks
- boxes = decode_boxes(loc, self._priors)
- landmarks = decode_landmarks(landmarks, self._priors)
-
- boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
-
- # Extract confidence scores for the face class
- scores = conf[:, 1]
- mask = scores > self.conf_thresh
-
- # Filter by confidence threshold
- boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
-
- # Sort by scores
- order = scores.argsort()[::-1][:self.pre_nms_topk]
- boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
-
- # Apply NMS
- detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
- keep = non_max_supression(detections, self.nms_thresh)
- detections, landmarks = detections[keep], landmarks[keep]
-
- # Keep top-k detections
- detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
-
- landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
-
- return detections, landmarks
-
- def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
- # Scale bounding boxes and landmarks to the original image size.
- bbox_scale = np.array([shape[0], shape[1]] * 2)
- boxes = boxes * bbox_scale / resize_factor
-
- landmark_scale = np.array([shape[0], shape[1]] * 5)
- landmarks = landmarks * landmark_scale / resize_factor
-
- return boxes, landmarks