mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 00:52:25 +00:00
add apple silicon support and update documentation
- add dynamic onnx provider selection for m1/m2/m3/m4 macs - replace mkdocs with simple markdown files - fix model download and scrfd detection issues - update ci/cd workflows
This commit is contained in:
64
.github/workflows/build.yml
vendored
64
.github/workflows/build.yml
vendored
@@ -1,64 +0,0 @@
|
||||
name: Build, Test, and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main # Trigger on pushes to main
|
||||
tags:
|
||||
- "v*.*.*" # Trigger publish on version tags
|
||||
pull_request:
|
||||
branches:
|
||||
- main # Trigger checks on pull requests to main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Run Tests
|
||||
run: pytest
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build # Publish only if tests pass
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main' # Trigger only on pushes to main
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10" # Use a single Python version for publishing
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build Package
|
||||
run: python -m build
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
87
.github/workflows/ci.yml
vendored
Normal file
87
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Check ONNX Runtime providers
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: |
|
||||
python -m pip install twine
|
||||
twine check dist/*
|
||||
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
105
.github/workflows/publish.yml
vendored
Normal file
105
.github/workflows/publish.yml
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
echo "pyproject.toml version: $PYPROJECT_VERSION"
|
||||
|
||||
- name: Verify version match
|
||||
run: |
|
||||
if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
|
||||
echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate, test]
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/uniface/
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: twine check dist/*
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
395
MODELS.md
Normal file
395
MODELS.md
Normal file
@@ -0,0 +1,395 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|---------------------|--------|--------|--------|--------|--------|----------------------------|
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|-----------------|--------|-------|--------|--------|--------|----------------------------|
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|-------------|--------|-------|----------------------------|
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-----------------|-----------------|--------|------|--------------------|
|
||||
| `MNET_025` | MobileNetV1 0.25| 0.2M | 1MB | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 1.0M | 4MB | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 0.8M | 3MB | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 2.5M | 10MB | Balanced mobile |
|
||||
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|----------|--------|------|----------------------|
|
||||
| `SPHERE20` | Sphere20 | 13.0M | 50MB | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 24.2M | 92MB | Research/Comparison |
|
||||
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
|------------|--------|--------|------|-----------------------------|
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
|------------|-------------|--------|------|-------------------|
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
|--------------|---------|--------|------|-----------------------|
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection Guide
|
||||
|
||||
### By Use Case
|
||||
|
||||
#### Mobile/Edge Devices
|
||||
- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `MobileFace(MNET_V2)`
|
||||
- **Priority**: Speed, small model size
|
||||
|
||||
#### Real-Time Applications (Webcam, Video)
|
||||
- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `ArcFace(MNET)`
|
||||
- **Priority**: Speed-accuracy balance
|
||||
|
||||
#### High-Accuracy Applications (Security, Verification)
|
||||
- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Maximum accuracy
|
||||
|
||||
#### Server/Cloud Deployment
|
||||
- **Detection**: `SCRFD(SCRFD_10G)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Accuracy, batch processing
|
||||
|
||||
---
|
||||
|
||||
### By Hardware
|
||||
|
||||
#### Apple Silicon (M1/M2/M3/M4)
|
||||
**Recommended**: All models work well with CoreML acceleration
|
||||
|
||||
```bash
|
||||
pip install uniface[silicon]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
|
||||
|
||||
**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### NVIDIA GPU (CUDA)
|
||||
**Recommended**: Larger models for maximum throughput
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
|
||||
- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
|
||||
- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
|
||||
|
||||
#### CPU Only
|
||||
**Recommended**: Lightweight models
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
|
||||
|
||||
**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
|
||||
|
||||
---
|
||||
|
||||
## Benchmark Details
|
||||
|
||||
### How to Benchmark
|
||||
|
||||
Run benchmarks on your own hardware:
|
||||
|
||||
```bash
|
||||
# Detection speed
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
|
||||
# Compare models
|
||||
python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
|
||||
python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
|
||||
```
|
||||
|
||||
### Accuracy Metrics Explained
|
||||
|
||||
- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
|
||||
- **Easy**: Large faces (>50px), clear backgrounds
|
||||
- **Medium**: Medium-sized faces (30-50px), moderate occlusion
|
||||
- **Hard**: Small faces (<30px), heavy occlusion, blur
|
||||
|
||||
*Accuracy values are from the original papers - see references below*
|
||||
|
||||
- **Model Size**: ONNX model file size (affects download time and memory)
|
||||
- **Params**: Number of model parameters (affects inference speed)
|
||||
|
||||
### Important Notes
|
||||
|
||||
1. **Speed varies by**:
|
||||
- Image resolution
|
||||
- Number of faces in image
|
||||
- Hardware (CPU/GPU/CoreML)
|
||||
- Batch size
|
||||
- Operating system
|
||||
|
||||
2. **Accuracy varies by**:
|
||||
- Image quality
|
||||
- Lighting conditions
|
||||
- Face pose and occlusion
|
||||
- Demographic factors
|
||||
|
||||
3. **Always benchmark on your specific use case** before choosing a model
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
|
||||
355
QUICKSTART.md
Normal file
355
QUICKSTART.md
Normal file
@@ -0,0 +1,355 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon)
|
||||
pip install uniface[silicon]
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"✅ Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"❌ Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = create_detector(
|
||||
'scrfd',
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
Make sure you installed with CoreML support:
|
||||
|
||||
```bash
|
||||
pip install uniface[silicon]
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# ✅ Correct imports
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface.detection import create_detector
|
||||
|
||||
# ❌ Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
560
README.md
560
README.md
@@ -1,44 +1,80 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://github.com/yakhyo/uniface)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://www.python.org/dev/peps/pep-0008/)
|
||||
[](https://github.com/yakhyo/uniface/releases)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
</div>
|
||||
|
||||
**uniface** is a lightweight face detection library designed for high-performance face localization, landmark detection and face alignment. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
| Date | Feature Description |
|
||||
| ---------- | --------------------------------------------------------------------------------------------------------------------- |
|
||||
| Planned | 🎭**Age and Gender Detection**: Planned feature for predicting age and gender from facial images. |
|
||||
| Planned | 🧩**Face Recognition**: Upcoming capability to identify and verify faces. |
|
||||
| 2024-11-21 | 🔄**Face Alignment**: Added precise face alignment for better downstream tasks. |
|
||||
| 2024-11-20 | ⚡**High-Speed Face Detection**: ONNX model integration for faster and efficient face detection. |
|
||||
| 2024-11-20 | 🎯**Facial Landmark Localization**: Accurate detection of key facial features like eyes, nose, and mouth. |
|
||||
| 2024-11-20 | 🛠**API for Inference and Visualization**: Simplified API for seamless inference and visual results generation. |
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: CoreML (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
The easiest way to install **UniFace** is via [PyPI](https://pypi.org/project/uniface/). This will automatically install the library along with its prerequisites.
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
To work with the latest version of **UniFace**, which may not yet be released on PyPI, you can install it directly from the repository:
|
||||
### Platform-Specific Installation
|
||||
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For optimal performance with **CoreML acceleration** (3-5x faster):
|
||||
|
||||
```bash
|
||||
# Standard installation (CPU only)
|
||||
pip install uniface
|
||||
|
||||
# With CoreML acceleration (recommended for M-series chips)
|
||||
pip install uniface[silicon]
|
||||
```
|
||||
|
||||
**Verify CoreML is available:**
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print(ort.get_available_providers())
|
||||
# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
```bash
|
||||
# With CUDA acceleration
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
@@ -50,254 +86,362 @@ pip install -e .
|
||||
|
||||
## Quick Start
|
||||
|
||||
To get started with face detection using **UniFace**, check out the [example notebook](examples/face_detection.ipynb).
|
||||
It demonstrates how to initialize the model, run inference, and visualize the results.
|
||||
### Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize detector
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
## Documentation
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/alignment_result.png">
|
||||
</div>
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
Explore the following example notebooks to learn how to use **UniFace** effectively:
|
||||
---
|
||||
|
||||
- [Face Detection](examples/face_detection.ipynb): Demonstrates how to perform face detection, draw bounding boxes, and landmarks on an image.
|
||||
- [Face Alignment](examples/face_alignment.ipynb): Shows how to align faces using detected landmarks.
|
||||
- [Age and Gender Detection](examples/age_gender.ipynb): Example for detecting age and gender from faces. (underdevelopment)
|
||||
## API Overview
|
||||
|
||||
### 🚀 Initialize the RetinaFace Model
|
||||
|
||||
To use the RetinaFace model for face detection, initialize it with either custom or default configuration parameters.
|
||||
|
||||
#### Full Initialization (with custom parameters)
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface import create_detector, create_recognizer, create_landmarker
|
||||
|
||||
# Create detector with default settings
|
||||
detector = create_detector('retinaface')
|
||||
|
||||
# Create with custom config
|
||||
detector = create_detector(
|
||||
'scrfd',
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
```
|
||||
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, ArcFace, MobileFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Initialize RetinaFace with custom configuration
|
||||
uniface_inference = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2, # Model name from enum
|
||||
conf_thresh=0.5, # Confidence threshold for detections
|
||||
pre_nms_topk=5000, # Number of top detections before NMS
|
||||
nms_thresh=0.4, # IoU threshold for NMS
|
||||
post_nms_topk=750, # Number of top detections after NMS
|
||||
dynamic_size=False, # Whether to allow arbitrary input sizes
|
||||
input_size=(640, 640) # Input image size (HxW)
|
||||
)
|
||||
```
|
||||
|
||||
#### Minimal Initialization (uses default parameters)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize with default settings
|
||||
uniface_inference = RetinaFace()
|
||||
```
|
||||
|
||||
**Default Parameters:**
|
||||
|
||||
```python
|
||||
model_name = RetinaFaceWeights.MNET_V2
|
||||
conf_thresh = 0.5
|
||||
pre_nms_topk = 5000
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
post_nms_topk = 750
|
||||
dynamic_size = False
|
||||
input_size = (640, 640)
|
||||
)
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
```
|
||||
|
||||
### Run Inference
|
||||
|
||||
Inference on image:
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface import detect_faces
|
||||
|
||||
# Load an image
|
||||
image_path = "assets/test.jpg"
|
||||
original_image = cv2.imread(image_path)
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(original_image)
|
||||
# boxes: [x_min, y_min, x_max, y_max, confidence]
|
||||
|
||||
# Visualize results
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
|
||||
|
||||
# Save the output image
|
||||
output_path = "output.jpg"
|
||||
cv2.imwrite(output_path, original_image)
|
||||
print(f"Saved output image to {output_path}")
|
||||
```
|
||||
|
||||
Inference on video:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize the webcam
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Error: Unable to access the webcam.")
|
||||
exit()
|
||||
|
||||
while True:
|
||||
# Capture a frame from the webcam
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to read frame.")
|
||||
break
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(frame)
|
||||
# 'boxes' contains bounding box coordinates and confidence scores:
|
||||
# Format: [x_min, y_min, x_max, y_max, confidence]
|
||||
|
||||
# Draw detections on the frame
|
||||
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
|
||||
|
||||
# Display the output
|
||||
cv2.imshow("Webcam Inference", frame)
|
||||
|
||||
# Exit if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release the webcam and close all OpenCV windows
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Evaluation results of available models on WiderFace
|
||||
## Model Performance
|
||||
|
||||
| RetinaFace Models | Easy | Medium | Hard |
|
||||
| ------------------ | ---------------- | ---------------- | ---------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
|
||||
| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
|
||||
| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
|
||||
| retinaface_r18 | 92.50% | 91.02% | 86.63% |
|
||||
| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
## API Reference
|
||||
---
|
||||
|
||||
### `RetinaFace` Class
|
||||
## Examples
|
||||
|
||||
#### Initialization
|
||||
### Webcam Face Detection
|
||||
|
||||
```python
|
||||
from typings import Tuple
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
RetinaFace(
|
||||
model_name: RetinaFaceWeights,
|
||||
conf_thresh: float = 0.5,
|
||||
pre_nms_topk: int = 5000,
|
||||
nms_thresh: float = 0.4,
|
||||
post_nms_topk: int = 750,
|
||||
dynamic_size: bool = False,
|
||||
input_size: Tuple[int, int] = (640, 640)
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
**Parameters**:
|
||||
### Logging Configuration
|
||||
|
||||
- `model_name` _(RetinaFaceWeights)_: Enum value for model to use. Supported values:
|
||||
- `MNET_025`, `MNET_050`, `MNET_V1`, `MNET_V2`, `RESNET18`, `RESNET34`
|
||||
- `conf_thresh` _(float, default=0.5)_: Minimum confidence score for detections.
|
||||
- `pre_nms_topk` _(int, default=5000)_: Max detections to keep before NMS.
|
||||
- `nms_thresh` _(float, default=0.4)_: IoU threshold for Non-Maximum Suppression.
|
||||
- `post_nms_topk` _(int, default=750)_: Max detections to keep after NMS.
|
||||
- `dynamic_size` _(Optional[bool], default=False)_: Use dynamic input size.
|
||||
- `input_size` _(Optional[Tuple[int, int]], default=(640, 640))_: Static input size for the model (width, height).
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `detect` Method
|
||||
## Testing
|
||||
|
||||
```python
|
||||
detect(
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: str = "default",
|
||||
center_weight: float = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Detects faces in the given image and returns bounding boxes and landmarks.
|
||||
|
||||
**Parameters**:
|
||||
|
||||
- `image` _(np.ndarray)_: Input image in BGR format.
|
||||
- `max_num` _(int, default=0)_: Maximum number of faces to return. `0` means return all.
|
||||
- `metric` _(str, default="default")_: Metric for prioritizing detections:
|
||||
- `"default"`: Prioritize detections closer to the image center.
|
||||
- `"max"`: Prioritize larger bounding box areas.
|
||||
- `center_weight` _(float, default=2.0)_: Weight for prioritizing center-aligned faces.
|
||||
|
||||
**Returns**:
|
||||
|
||||
- `bounding_boxes` _(np.ndarray)_: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
|
||||
- `landmarks` _(np.ndarray)_: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
|
||||
|
||||
---
|
||||
|
||||
### Visualization Utilities
|
||||
## Development
|
||||
|
||||
#### `draw_detections`
|
||||
### Setup Development Environment
|
||||
|
||||
```python
|
||||
draw_detections(
|
||||
image: np.ndarray,
|
||||
detections: Tuple[np.ndarray, np.ndarray],
|
||||
vis_threshold: float = 0.6
|
||||
) -> None
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Format code
|
||||
black uniface/
|
||||
isort uniface/
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Draws bounding boxes and landmarks on the given image.
|
||||
### Project Structure
|
||||
|
||||
**Parameters**:
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
|
||||
- `image` _(np.ndarray)_: The input image in BGR format.
|
||||
- `detections` _(Tuple[np.ndarray, np.ndarray])_: A tuple of bounding boxes and landmarks.
|
||||
- `vis_threshold` _(float, default=0.6)_: Minimum confidence score for visualization.
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions to enhance the library! Feel free to:
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
|
||||
- Submit bug reports or feature requests.
|
||||
- Fork the repository and create a pull request.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
|
||||
- Inspired by InsightFace and other face detection projects.
|
||||
|
||||
---
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to UniFace.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Code of Conduct
|
||||
|
||||
Guidelines for community behavior.
|
||||
@@ -1,3 +0,0 @@
|
||||
# License
|
||||
|
||||
MIT License or your custom license here.
|
||||
@@ -1,2 +0,0 @@
|
||||
# Blog
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
# UniFace
|
||||
|
||||
Welcome to the UniFace documentation.
|
||||
@@ -1,37 +0,0 @@
|
||||
# 🚀 Installation
|
||||
|
||||
## 📦 Install from PyPI
|
||||
|
||||
### CPU-only (default):
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
This installs the CPU-compatible version of ONNX Runtime (`onnxruntime`) and all core dependencies.
|
||||
|
||||
### GPU support:
|
||||
|
||||
```bash
|
||||
pip install "uniface[gpu]"
|
||||
```
|
||||
|
||||
This installs `onnxruntime-gpu` for accelerated inference on supported NVIDIA GPUs.
|
||||
Make sure your system meets the [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/build/eps.html#cuda).
|
||||
|
||||
---
|
||||
|
||||
## 🔧 Install from GitHub (latest version)
|
||||
|
||||
Clone the repository and install it manually:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# CPU version
|
||||
pip install .
|
||||
|
||||
# Or with GPU support
|
||||
pip install ".[gpu]"
|
||||
```
|
||||
@@ -1,3 +0,0 @@
|
||||
# Age & Gender Estimation
|
||||
|
||||
Age and gender model usage.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Face Detection
|
||||
|
||||
Details on face detection models.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Gaze Estimation
|
||||
|
||||
Gaze detection implementation and usage.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Landmark Detection
|
||||
|
||||
Details on landmark prediction.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Face Recognition
|
||||
|
||||
Details on face recognition models.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Overview
|
||||
|
||||
High-level overview of UniFace features.
|
||||
@@ -1,9 +0,0 @@
|
||||
# Facial Attribute API Reference
|
||||
|
||||
# Age and Gender Model
|
||||
|
||||
::: uniface.attribute.age_gender.AgeGender
|
||||
|
||||
# Emotion Model
|
||||
|
||||
:::uniface.attribute.emotion.Emotion
|
||||
@@ -1,10 +0,0 @@
|
||||
# Face Detection API Reference
|
||||
|
||||
# RetinaFace
|
||||
|
||||
::: uniface.detection.retinaface.RetinaFace
|
||||
|
||||
|
||||
# SCRFD
|
||||
|
||||
::: uniface.detection.scrfd.SCRFD
|
||||
@@ -1,5 +0,0 @@
|
||||
# Landmark API Reference
|
||||
|
||||
# Landmark Model
|
||||
|
||||
::: uniface.landmark.models.Landmark106
|
||||
@@ -1,17 +0,0 @@
|
||||
# Face Recognition API Reference
|
||||
|
||||
# SphereFace
|
||||
|
||||
::: uniface.recognition.models.SphereFace
|
||||
|
||||
# MobileFace
|
||||
|
||||
:::uniface.recognition.models.MobileFace
|
||||
|
||||
# ArcFace
|
||||
|
||||
:::uniface.recognition.models.ArcFace
|
||||
|
||||
# BaseRecognizer class
|
||||
|
||||
:::uniface.recognition.base.BaseRecognizer
|
||||
@@ -1,3 +0,0 @@
|
||||
# Integration
|
||||
|
||||
How to integrate UniFace into your app.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Performance Tips
|
||||
|
||||
Speed and memory optimization.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Quickstart
|
||||
|
||||
Get started with UniFace quickly.
|
||||
@@ -1,3 +0,0 @@
|
||||
# Usage
|
||||
|
||||
How to use UniFace with code examples.
|
||||
87
mkdocs.yml
87
mkdocs.yml
@@ -1,87 +0,0 @@
|
||||
site_name: uniface
|
||||
site_url: https://yakhyo.github.io/uniface/
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_description: "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
|
||||
repo_url: https://github.com/yakhyo/uniface
|
||||
repo_name: yakhyo/uniface
|
||||
|
||||
theme:
|
||||
name: material
|
||||
features:
|
||||
- announce.dismiss
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.code.annotate
|
||||
- content.code.copy
|
||||
- content.tooltips
|
||||
- navigation.footer
|
||||
- navigation.indexes
|
||||
- navigation.sections
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- navigation.tracking
|
||||
- search.highlight
|
||||
- search.share
|
||||
- search.suggest
|
||||
- toc.follow
|
||||
- content.code.expand
|
||||
palette:
|
||||
- media: "(prefers-color-scheme)"
|
||||
toggle:
|
||||
icon: material/lightbulb-outline
|
||||
name: Switch to light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/lightbulb
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: black
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/lightbulb-off-outline
|
||||
name: Switch to system preference
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
favicon: assets/favicon.png
|
||||
icon:
|
||||
logo: logo
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Overview: overview.md
|
||||
- Installation: installation.md
|
||||
- Usage: usage.md
|
||||
- Models:
|
||||
- Face Detection: models/detection.md
|
||||
- Face Recognition: models/recognition.md
|
||||
- Landmark Detection: models/landmarks.md
|
||||
- Age & Gender Estimation: models/age_gender.md
|
||||
- Gaze Estimation: models/gaze.md
|
||||
- Tutorials:
|
||||
- Quickstart: tutorials/quickstart.md
|
||||
- App Integration: tutorials/integration.md
|
||||
- Performance Tips: tutorials/performance.md
|
||||
- API Reference:
|
||||
- Detection: reference/detection.md
|
||||
- Recognition: reference/recognition.md
|
||||
- Landmark: reference/landmark.md
|
||||
- Attribute: reference/attribute.md
|
||||
- About:
|
||||
- Changelog: about/changelog.md
|
||||
- License: about/license.md
|
||||
- Code of Conduct: about/conduct.md
|
||||
|
||||
plugins:
|
||||
- blog
|
||||
- search:
|
||||
separator: '[\s\u200b\-_,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
|
||||
- minify:
|
||||
minify_html: true
|
||||
- mkdocstrings:
|
||||
default_handler: python
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "0.1.8"
|
||||
version = "0.1.9"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
@@ -8,19 +8,20 @@ authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
]
|
||||
dependencies = [
|
||||
"numpy",
|
||||
"opencv-python",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"scikit-image",
|
||||
"requests",
|
||||
"tqdm"
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest"]
|
||||
gpu = ["onnxruntime-gpu"]
|
||||
dev = ["pytest>=7.0.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
silicon = ["onnxruntime-silicon>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
numpy
|
||||
opencv-python
|
||||
onnx
|
||||
onnxruntime-gpu
|
||||
scikit-image
|
||||
requests
|
||||
pytest
|
||||
tqdm
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
mkdocs-material
|
||||
mkdocs-minify-plugin
|
||||
mkdocstrings[python]
|
||||
389
scripts/TESTING.md
Normal file
389
scripts/TESTING.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# Testing Scripts Guide
|
||||
|
||||
Complete guide to testing all scripts in the `scripts/` directory.
|
||||
|
||||
---
|
||||
|
||||
## 📁 Available Scripts
|
||||
|
||||
1. **download_model.py** - Download and verify model weights
|
||||
2. **run_detection.py** - Face detection on images
|
||||
3. **run_recognition.py** - Face recognition (extract embeddings)
|
||||
4. **run_face_search.py** - Real-time face matching with webcam
|
||||
5. **sha256_generate.py** - Generate SHA256 checksums for models
|
||||
|
||||
---
|
||||
|
||||
## Testing Each Script
|
||||
|
||||
### 1. Test Model Download
|
||||
|
||||
```bash
|
||||
# Download a specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
|
||||
# Download all RetinaFace models (takes ~5 minutes, ~200MB)
|
||||
python scripts/download_model.py
|
||||
|
||||
# Verify models are cached
|
||||
ls -lh ~/.uniface/models/
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
📥 Downloading model: retinaface_mnet_v2
|
||||
2025-11-08 00:00:00 - INFO - Downloading model 'RetinaFaceWeights.MNET_V2' from https://...
|
||||
Downloading ~/.uniface/models/retinaface_mnet_v2.onnx: 100%|████| 3.5M/3.5M
|
||||
2025-11-08 00:00:05 - INFO - Successfully downloaded 'RetinaFaceWeights.MNET_V2'
|
||||
✅ All requested weights are ready and verified.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 2. Test Face Detection
|
||||
|
||||
```bash
|
||||
# Basic detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
|
||||
# With custom settings
|
||||
python scripts/run_detection.py \
|
||||
--image assets/test.jpg \
|
||||
--method scrfd \
|
||||
--threshold 0.7 \
|
||||
--save_dir outputs
|
||||
|
||||
# Benchmark mode (100 iterations)
|
||||
python scripts/run_detection.py \
|
||||
--image assets/test.jpg \
|
||||
--iterations 100
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
Initializing detector: retinaface
|
||||
2025-11-08 00:00:00 - INFO - Initializing RetinaFace with model=RetinaFaceWeights.MNET_V2...
|
||||
2025-11-08 00:00:01 - INFO - CoreML acceleration enabled (Apple Silicon)
|
||||
✅ Output saved at: outputs/test_out.jpg
|
||||
[1/1] ⏱️ Inference time: 0.0234 seconds
|
||||
```
|
||||
|
||||
**Verify Output:**
|
||||
```bash
|
||||
# Check output image was created
|
||||
ls -lh outputs/test_out.jpg
|
||||
|
||||
# View the image (macOS)
|
||||
open outputs/test_out.jpg
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 3. Test Face Recognition (Embedding Extraction)
|
||||
|
||||
```bash
|
||||
# Extract embeddings from an image
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# With different models
|
||||
python scripts/run_recognition.py \
|
||||
--image assets/test.jpg \
|
||||
--detector scrfd \
|
||||
--recognizer mobileface
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
Initializing detector: retinaface
|
||||
Initializing recognizer: arcface
|
||||
2025-11-08 00:00:00 - INFO - Successfully initialized face encoder from ~/.uniface/models/w600k_mbf.onnx
|
||||
Detected 1 face(s). Extracting embeddings for the first face...
|
||||
- Embedding shape: (1, 512)
|
||||
- L2 norm of unnormalized embedding: 64.2341
|
||||
- L2 norm of normalized embedding: 1.0000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. Test Real-Time Face Search (Webcam)
|
||||
|
||||
**Prerequisites:**
|
||||
- Webcam connected
|
||||
- Reference image with a clear face
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
python scripts/run_face_search.py --image assets/test.jpg
|
||||
|
||||
# With custom models
|
||||
python scripts/run_face_search.py \
|
||||
--image assets/test.jpg \
|
||||
--detector scrfd \
|
||||
--recognizer arcface
|
||||
```
|
||||
|
||||
**Expected Behavior:**
|
||||
1. Webcam window opens
|
||||
2. Faces are detected in real-time
|
||||
3. Green box = Match (similarity > 0.4)
|
||||
4. Red box = Unknown (similarity < 0.4)
|
||||
5. Press 'q' to quit
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
Initializing models...
|
||||
2025-11-08 00:00:00 - INFO - CoreML acceleration enabled (Apple Silicon)
|
||||
Extracting reference embedding...
|
||||
Webcam started. Press 'q' to quit.
|
||||
```
|
||||
|
||||
**Troubleshooting:**
|
||||
```bash
|
||||
# If webcam doesn't open
|
||||
python -c "import cv2; cap = cv2.VideoCapture(0); print('Webcam OK' if cap.isOpened() else 'Webcam FAIL')"
|
||||
|
||||
# If no faces detected
|
||||
# - Ensure good lighting
|
||||
# - Face should be frontal and clearly visible
|
||||
# - Try lowering threshold: edit script line 29, change 0.4 to 0.3
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5. Test SHA256 Generator (For Developers)
|
||||
|
||||
```bash
|
||||
# Generate checksum for a model file
|
||||
python scripts/sha256_generate.py ~/.uniface/models/retinaface_mnet_v2.onnx
|
||||
|
||||
# Generate for all models
|
||||
for model in ~/.uniface/models/*.onnx; do
|
||||
python scripts/sha256_generate.py "$model"
|
||||
done
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 Quick Verification Tests
|
||||
|
||||
### Test 1: Imports Work
|
||||
|
||||
```bash
|
||||
python -c "
|
||||
from uniface.detection import create_detector
|
||||
from uniface.recognition import create_recognizer
|
||||
print('✅ Imports successful')
|
||||
"
|
||||
```
|
||||
|
||||
### Test 2: Models Download
|
||||
|
||||
```bash
|
||||
python -c "
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print('✅ Model downloaded and loaded')
|
||||
"
|
||||
```
|
||||
|
||||
### Test 3: Detection Works
|
||||
|
||||
```bash
|
||||
python -c "
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detector.detect(image)
|
||||
print(f'✅ Detection works, found {len(faces)} faces')
|
||||
"
|
||||
```
|
||||
|
||||
### Test 4: Recognition Works
|
||||
|
||||
```bash
|
||||
python -c "
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
image = cv2.imread('assets/test.jpg')
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
landmarks = np.array(faces[0]['landmarks'])
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f'✅ Recognition works, embedding shape: {embedding.shape}')
|
||||
else:
|
||||
print('⚠️ No faces detected in test image')
|
||||
"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## End-to-End Test Workflow
|
||||
|
||||
Run this complete workflow to verify everything works:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Save as test_all_scripts.sh
|
||||
|
||||
echo "=== Testing UniFace Scripts ==="
|
||||
echo ""
|
||||
|
||||
# Test 1: Download models
|
||||
echo "1️⃣ Testing model download..."
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✅ Model download: PASS"
|
||||
else
|
||||
echo "❌ Model download: FAIL"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 2: Face detection
|
||||
echo "2️⃣ Testing face detection..."
|
||||
python scripts/run_detection.py --image assets/test.jpg --save_dir /tmp/uniface_test
|
||||
if [ $? -eq 0 ] && [ -f /tmp/uniface_test/test_out.jpg ]; then
|
||||
echo "✅ Face detection: PASS"
|
||||
else
|
||||
echo "❌ Face detection: FAIL"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: Face recognition
|
||||
echo "3️⃣ Testing face recognition..."
|
||||
python scripts/run_recognition.py --image assets/test.jpg > /tmp/uniface_recognition.log
|
||||
if [ $? -eq 0 ] && grep -q "Embedding shape" /tmp/uniface_recognition.log; then
|
||||
echo "✅ Face recognition: PASS"
|
||||
else
|
||||
echo "❌ Face recognition: FAIL"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "=== All Tests Passed! 🎉 ==="
|
||||
```
|
||||
|
||||
**Run the test suite:**
|
||||
```bash
|
||||
chmod +x test_all_scripts.sh
|
||||
./test_all_scripts.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Benchmarking
|
||||
|
||||
### Benchmark Detection Speed
|
||||
|
||||
```bash
|
||||
# Test different models
|
||||
for model in retinaface scrfd; do
|
||||
echo "Testing $model..."
|
||||
python scripts/run_detection.py \
|
||||
--image assets/test.jpg \
|
||||
--method $model \
|
||||
--iterations 50
|
||||
done
|
||||
```
|
||||
|
||||
### Benchmark Recognition Speed
|
||||
|
||||
```bash
|
||||
# Test different recognizers
|
||||
for recognizer in arcface mobileface; do
|
||||
echo "Testing $recognizer..."
|
||||
time python scripts/run_recognition.py \
|
||||
--image assets/test.jpg \
|
||||
--recognizer $recognizer
|
||||
done
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🐛 Common Issues
|
||||
|
||||
### Issue: "No module named 'uniface'"
|
||||
|
||||
```bash
|
||||
# Solution: Install in editable mode
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
### Issue: "Failed to load image"
|
||||
|
||||
```bash
|
||||
# Check image exists
|
||||
ls -lh assets/test.jpg
|
||||
|
||||
# Try with absolute path
|
||||
python scripts/run_detection.py --image $(pwd)/assets/test.jpg
|
||||
```
|
||||
|
||||
### Issue: "No faces detected"
|
||||
|
||||
```bash
|
||||
# Lower confidence threshold
|
||||
python scripts/run_detection.py \
|
||||
--image assets/test.jpg \
|
||||
--threshold 0.3
|
||||
```
|
||||
|
||||
### Issue: Models downloading slowly
|
||||
|
||||
```bash
|
||||
# Check internet connection
|
||||
curl -I https://github.com/yakhyo/uniface/releases
|
||||
|
||||
# Or download manually
|
||||
wget https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv2.onnx \
|
||||
-O ~/.uniface/models/retinaface_mnet_v2.onnx
|
||||
```
|
||||
|
||||
### Issue: CoreML not available on Mac
|
||||
|
||||
```bash
|
||||
# Install CoreML-enabled ONNX Runtime
|
||||
pip uninstall onnxruntime
|
||||
pip install onnxruntime-silicon
|
||||
|
||||
# Verify
|
||||
python -c "import onnxruntime as ort; print(ort.get_available_providers())"
|
||||
# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ Script Status Summary
|
||||
|
||||
| Script | Status | API Updated | Tested |
|
||||
|-----------------------|--------|-------------|--------|
|
||||
| download_model.py | ✅ | ✅ | ✅ |
|
||||
| run_detection.py | ✅ | ✅ | ✅ |
|
||||
| run_recognition.py | ✅ | ✅ | ✅ |
|
||||
| run_face_search.py | ✅ | ✅ | ✅ |
|
||||
| sha256_generate.py | ✅ | N/A | ✅ |
|
||||
|
||||
All scripts are updated and working with the new dict-based API! 🎉
|
||||
|
||||
---
|
||||
|
||||
## 📝 Notes
|
||||
|
||||
- All scripts now use the factory functions (`create_detector`, `create_recognizer`)
|
||||
- Scripts work with the new dict-based detection API
|
||||
- Model download bug is fixed (enum vs string issue)
|
||||
- CoreML acceleration is automatically detected on Apple Silicon
|
||||
- All scripts include proper error handling
|
||||
|
||||
---
|
||||
|
||||
Need help with a specific script? Check the main [README.md](../README.md) or [QUICKSTART.md](../QUICKSTART.md)!
|
||||
|
||||
@@ -16,11 +16,11 @@ def main():
|
||||
if args.model:
|
||||
weight = RetinaFaceWeights[args.model]
|
||||
print(f"📥 Downloading model: {weight.value}")
|
||||
verify_model_weights(weight.value)
|
||||
verify_model_weights(weight) # Pass enum, not string
|
||||
else:
|
||||
print("📥 Downloading all models...")
|
||||
for weight in RetinaFaceWeights:
|
||||
verify_model_weights(weight.value)
|
||||
verify_model_weights(weight) # Pass enum, not string
|
||||
|
||||
print("✅ All requested weights are ready and verified.")
|
||||
|
||||
|
||||
@@ -6,9 +6,6 @@ import numpy as np
|
||||
from uniface.detection import create_detector
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Import enums for argument choices
|
||||
from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
"""
|
||||
@@ -67,7 +64,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2)
|
||||
detector = create_detector(method=args.detector)
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
recognizer = create_recognizer(method=args.recognizer)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from uniface import RetinaFace
|
||||
import pytest
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -32,20 +33,27 @@ def test_inference_on_640x640_image(retinaface_model):
|
||||
# Generate a mock 640x640 BGR image
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(mock_image)
|
||||
# Run inference - returns list of dictionaries
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Check output types
|
||||
assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
|
||||
assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
|
||||
# Check output type
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
|
||||
# Check that detections have the expected shape
|
||||
if detections.size > 0: # If faces are detected
|
||||
assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
|
||||
# Check that each face has the expected structure
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
# Check landmarks shape
|
||||
if landmarks.size > 0:
|
||||
assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
|
||||
# Check bbox format
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
|
||||
# Check landmarks format
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
@@ -56,12 +64,12 @@ def test_confidence_threshold(retinaface_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Run inference
|
||||
detections, _ = retinaface_model.detect(mock_image)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Ensure all detections have confidence scores above the threshold
|
||||
if detections.size > 0: # If faces are detected
|
||||
confidence_scores = detections[:, 4]
|
||||
assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
@@ -72,8 +80,7 @@ def test_no_faces_detected(retinaface_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(empty_image)
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
|
||||
# Ensure no detections or landmarks are found
|
||||
assert detections.size == 0, "Detections should be empty for a blank image."
|
||||
assert landmarks.size == 0, "Landmarks should be empty for a blank image."
|
||||
# Ensure no detections are found
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
|
||||
@@ -13,35 +13,45 @@
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "0.1.8"
|
||||
__version__ = "0.1.9"
|
||||
|
||||
|
||||
from .detection import detect_faces, create_detector, list_available_detectors
|
||||
from .recognition import create_recognizer
|
||||
from .landmark import create_landmarker
|
||||
|
||||
from uniface.face_utils import face_alignment, compute_similarity
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
from .attribute import AgeGender, Emotion
|
||||
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
|
||||
__all__ = [
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
|
||||
'create_detector',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
|
||||
'Logger'
|
||||
"__author__",
|
||||
"__license__",
|
||||
"__version__",
|
||||
# Factory functions
|
||||
"create_detector",
|
||||
"create_landmarker",
|
||||
"create_recognizer",
|
||||
"detect_faces",
|
||||
"list_available_detectors",
|
||||
# Detection models
|
||||
"RetinaFace",
|
||||
"SCRFD",
|
||||
# Recognition models
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
# Landmark models
|
||||
"Landmark106",
|
||||
# Attribute models
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
# Utilities
|
||||
"compute_similarity",
|
||||
"draw_detections",
|
||||
"face_alignment",
|
||||
"verify_model_weights",
|
||||
"Logger",
|
||||
]
|
||||
|
||||
@@ -2,16 +2,17 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from typing import Tuple, Union, List
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import AgeGenderWeights
|
||||
from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ["AgeGender"]
|
||||
|
||||
@@ -42,10 +43,7 @@ class AgeGender(Attribute):
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
@@ -75,16 +73,10 @@ class AgeGender(Attribute):
|
||||
|
||||
# **Rotation parameter restored here**
|
||||
rotation = 0.0
|
||||
aligned_face, _ = bbox_center_alignment(
|
||||
image, center, self.input_size[1], scale, rotation
|
||||
)
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True
|
||||
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
|
||||
)
|
||||
return blob
|
||||
|
||||
@@ -127,8 +119,8 @@ class AgeGender(Attribute):
|
||||
if __name__ == "__main__":
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.detection import create_detector
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
@@ -156,7 +148,7 @@ if __name__ == "__main__":
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
box = detection["bbox"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
@@ -171,7 +163,7 @@ if __name__ == "__main__":
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
from typing import Tuple, List, Literal, Dict, Any
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import (
|
||||
@@ -95,10 +95,7 @@ class RetinaFace(BaseDetector):
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
|
||||
@@ -2,20 +2,20 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
from typing import Tuple, List, Literal, Dict, Any
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import non_max_supression, distance2bbox, distance2kps, resize_image
|
||||
from .utils import distance2bbox, distance2kps, non_max_supression, resize_image
|
||||
|
||||
__all__ = ['SCRFD']
|
||||
__all__ = ["SCRFD"]
|
||||
|
||||
|
||||
class SCRFD(BaseDetector):
|
||||
@@ -52,10 +52,10 @@ class SCRFD(BaseDetector):
|
||||
super().__init__(**kwargs)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
model_name = kwargs.get('model_name', SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get('conf_thresh', 0.5)
|
||||
nms_thresh = kwargs.get('nms_thresh', 0.4)
|
||||
input_size = kwargs.get('input_size', (640, 640))
|
||||
model_name = kwargs.get("model_name", SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
input_size = kwargs.get("input_size", (640, 640))
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
@@ -91,10 +91,7 @@ class SCRFD(BaseDetector):
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
@@ -176,11 +173,7 @@ class SCRFD(BaseDetector):
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2
|
||||
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
@@ -212,6 +205,10 @@ class SCRFD(BaseDetector):
|
||||
|
||||
scores_list, bboxes_list, kpss_list = self.postprocess(outputs, image_size=image.shape[:2])
|
||||
|
||||
# Handle case when no faces are detected
|
||||
if not scores_list:
|
||||
return []
|
||||
|
||||
scores = np.vstack(scores_list)
|
||||
scores_ravel = scores.ravel()
|
||||
order = scores_ravel.argsort()[::-1]
|
||||
@@ -256,9 +253,9 @@ class SCRFD(BaseDetector):
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4].astype(float).tolist(),
|
||||
'confidence': detections[i, 4].item(),
|
||||
'landmarks': landmarks[i].astype(float).tolist()
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
@@ -273,7 +270,7 @@ def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for (x, y) in points.astype(np.int32):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
@@ -300,9 +297,9 @@ if __name__ == "__main__":
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
@@ -314,8 +311,7 @@ if __name__ == "__main__":
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from typing import Tuple
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark']
|
||||
@@ -63,10 +63,7 @@ class Landmark106(BaseLandmarker):
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_metadata = self.session.get_inputs()[0]
|
||||
|
||||
@@ -46,14 +46,14 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
root = os.path.expanduser(root)
|
||||
os.makedirs(root, exist_ok=True)
|
||||
|
||||
model_name = model_name.value
|
||||
# Keep model_name as enum for dictionary lookup
|
||||
url = const.MODEL_URLS.get(model_name)
|
||||
if not url:
|
||||
Logger.error(f"No URL found for model '{model_name}'")
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
file_ext = os.path.splitext(url)[1]
|
||||
model_path = os.path.normpath(os.path.join(root, f'{model_name}{file_ext}'))
|
||||
model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
Logger.info(f"Downloading model '{model_name}' from {url}")
|
||||
|
||||
87
uniface/onnx_utils.py
Normal file
87
uniface/onnx_utils.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Utilities for ONNX Runtime configuration and provider selection.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""
|
||||
Get list of available ONNX Runtime execution providers for the current platform.
|
||||
|
||||
Automatically detects and prioritizes hardware acceleration:
|
||||
- CoreML on Apple Silicon (M1/M2/M3/M4)
|
||||
- CUDA on NVIDIA GPUs
|
||||
- CPU as fallback (always available)
|
||||
|
||||
Returns:
|
||||
List[str]: Ordered list of execution providers to use
|
||||
|
||||
Examples:
|
||||
>>> providers = get_available_providers()
|
||||
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On CPU-only: ['CPUExecutionProvider']
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = []
|
||||
|
||||
# Priority order: CoreML > CUDA > CPU
|
||||
if "CoreMLExecutionProvider" in available:
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
Logger.info("CoreML acceleration enabled (Apple Silicon)")
|
||||
|
||||
if "CUDAExecutionProvider" in available:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
Logger.info("CUDA acceleration enabled (NVIDIA GPU)")
|
||||
|
||||
# CPU is always available as fallback
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
if len(providers) == 1:
|
||||
Logger.info("Using CPU execution (no hardware acceleration detected)")
|
||||
|
||||
return providers
|
||||
|
||||
|
||||
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
|
||||
"""
|
||||
Create an ONNX Runtime inference session with optimal provider selection.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the ONNX model file
|
||||
providers (List[str], optional): List of providers to use.
|
||||
If None, automatically detects best available providers.
|
||||
|
||||
Returns:
|
||||
ort.InferenceSession: Configured ONNX Runtime session
|
||||
|
||||
Raises:
|
||||
RuntimeError: If session creation fails
|
||||
|
||||
Examples:
|
||||
>>> session = create_onnx_session("model.onnx")
|
||||
>>> # Automatically uses best available providers
|
||||
|
||||
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
|
||||
>>> # Force CPU-only execution
|
||||
"""
|
||||
if providers is None:
|
||||
providers = get_available_providers()
|
||||
|
||||
try:
|
||||
session = ort.InferenceSession(model_path, providers=providers)
|
||||
active_provider = session.get_providers()[0]
|
||||
Logger.debug(f"Session created with provider: {active_provider}")
|
||||
return session
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}") from e
|
||||
@@ -5,12 +5,12 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from dataclasses import dataclass
|
||||
from typing import Tuple, Union, List
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -53,10 +53,7 @@ class BaseRecognizer(ABC):
|
||||
"""
|
||||
try:
|
||||
# Initialize model session with available providers
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Extract input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
|
||||
@@ -1,262 +0,0 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
from typing import Tuple, List, Literal
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.common import (
|
||||
non_max_supression,
|
||||
resize_image,
|
||||
decode_boxes,
|
||||
generate_anchors,
|
||||
decode_landmarks
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace:
|
||||
"""
|
||||
Face detector based on the RetinaFace architecture.
|
||||
|
||||
Args:
|
||||
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression (NMS) threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int): Number of top-scoring boxes considered before applying NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Maximum number of final detections retained after NMS. Defaults to 750.
|
||||
dynamic_size (bool): If True, anchors are generated dynamically per input image size. Defaults to False.
|
||||
input_size (Tuple[int, int]): Fixed input size (width, height) used when `dynamic_size` is False. Ignored if `dynamic_size=True`.
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Threshold for filtering detections based on confidence score.
|
||||
nms_thresh (float): IoU threshold for NMS.
|
||||
pre_nms_topk (int): Limit on boxes considered before NMS.
|
||||
post_nms_topk (int): Limit on detections kept after NMS.
|
||||
dynamic_size (bool): Whether anchors are generated dynamically.
|
||||
input_size (Tuple[int, int]): Static input size when `dynamic_size` is False.
|
||||
_model_path (str): Path to verified model weights. (Internal)
|
||||
_priors (np.ndarray): Anchor boxes used for detection. Precomputed if static input size is used. (Internal)
|
||||
|
||||
Raises:
|
||||
ValueError: If model weights are invalid or not found.
|
||||
RuntimeError: If the model fails to initialize.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
pre_nms_topk: int = 5000,
|
||||
post_nms_topk: int = 750,
|
||||
dynamic_size: bool = False,
|
||||
input_size: Tuple[int, int] = (640, 640), # Default input size if dynamic_size=False
|
||||
) -> None:
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.pre_nms_topk = pre_nms_topk
|
||||
self.post_nms_topk = post_nms_topk
|
||||
self.dynamic_size = dynamic_size
|
||||
self.input_size = input_size
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not dynamic_size and input_size is not None:
|
||||
self._priors = generate_anchors(image_size=input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess input image for model inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
|
||||
"""
|
||||
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]:
|
||||
- detections: Bounding boxes with confidence scores. Shape (N, 5), each row as [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks: Facial landmark coordinates. Shape (N, 5, 2), where each row contains 5 (x, y) points.
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
if self.dynamic_size:
|
||||
height, width, _ = image.shape
|
||||
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
|
||||
resize_factor = 1.0 # No resizing
|
||||
else:
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
height, width, _ = image.shape
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocessing
|
||||
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
|
||||
|
||||
if max_num > 0 and detections.shape[0] > max_num:
|
||||
# Calculate area of detections
|
||||
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == 'max':
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(scores)[::-1][:max_num]
|
||||
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_supression(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
landmark_scale = np.array([shape[0], shape[1]] * 5)
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
Reference in New Issue
Block a user