Compare commits
74 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39b50b62bd | ||
|
|
db7532ecf1 | ||
|
|
4b8dc2c0f9 | ||
|
|
0a2a10e165 | ||
|
|
84cda5f56c | ||
|
|
0771a7959a | ||
|
|
15947eb605 | ||
|
|
1ccc4f6b77 | ||
|
|
189755a1a6 | ||
|
|
11363fe0a8 | ||
|
|
fe3e70a352 | ||
|
|
8e218321a4 | ||
|
|
2c78f39e5d | ||
|
|
df673c4a3f | ||
|
|
496de7a491 | ||
|
|
89a05e4689 | ||
|
|
d3c2d959d0 | ||
|
|
f3d28d5ef5 | ||
|
|
c9f8215e28 | ||
|
|
0aea17d14d | ||
|
|
3cf13f70d4 | ||
|
|
666438909d | ||
|
|
77f14a616a | ||
|
|
98f8acc51b | ||
|
|
30a177981d | ||
|
|
0417f7531f | ||
|
|
b15504dfc5 | ||
|
|
fb29a919b1 | ||
|
|
b35b1a3f7c | ||
|
|
5bd6bb1673 | ||
|
|
cf5d06729d | ||
|
|
29964df259 | ||
|
|
eef4a0624a | ||
|
|
6a7ba6fc0a | ||
|
|
282737e0e9 | ||
|
|
597c86f997 | ||
|
|
d0446827e9 | ||
|
|
5f88345830 | ||
|
|
ab56589f77 | ||
|
|
d05e609ddf | ||
|
|
cee2b692ad | ||
|
|
67bb13c082 | ||
|
|
f9b4ea492b | ||
|
|
08f79e7d47 | ||
|
|
3a0b0e21b1 | ||
|
|
85cf413cb8 | ||
|
|
d1830c7058 | ||
|
|
204b1d75e1 | ||
|
|
777333eb2d | ||
|
|
c2d52e305a | ||
|
|
cd3ff79c2e | ||
|
|
fbca77e050 | ||
|
|
8dd7f3f101 | ||
|
|
0ae5714f99 | ||
|
|
64c9c2f452 | ||
|
|
31f97da783 | ||
|
|
d586cffb3a | ||
|
|
4256407044 | ||
|
|
e54607292f | ||
|
|
56ac8af432 | ||
|
|
904ba2be83 | ||
|
|
ad661da2f3 | ||
|
|
084b1132ad | ||
|
|
f22e8f01fb | ||
|
|
da09d7497d | ||
|
|
7330b4fd6e | ||
|
|
fa179c6a7a | ||
|
|
9acc6e344c | ||
|
|
9819520d76 | ||
|
|
db544d1a29 | ||
|
|
12ccac11b7 | ||
|
|
a158e47f52 | ||
|
|
5b4148f824 | ||
|
|
6a69739e8e |
BIN
.github/logos/logo_preview.jpg
vendored
Normal file
|
After Width: | Height: | Size: 826 KiB |
BIN
.github/logos/logo_readme.png
vendored
Normal file
|
After Width: | Height: | Size: 563 KiB |
BIN
.github/logos/logo_web.webp
vendored
Normal file
|
After Width: | Height: | Size: 33 KiB |
64
.github/workflows/build.yml
vendored
@@ -1,64 +0,0 @@
|
||||
name: Build, Test, and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- "v*.*.*" # Trigger publish on version tags
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev] || pip install pytest # Use extras_require if available
|
||||
|
||||
- name: Run Tests
|
||||
run: |
|
||||
pytest
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build # Publish only if tests pass
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10" # Use a single Python version for publishing
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build Package
|
||||
run: python -m build
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
87
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Check ONNX Runtime providers
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: |
|
||||
python -m pip install twine
|
||||
twine check dist/*
|
||||
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
108
.github/workflows/publish.yml
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
echo "pyproject.toml version: $PYPROJECT_VERSION"
|
||||
|
||||
- name: Verify version match
|
||||
run: |
|
||||
if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
|
||||
echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/uniface/
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: twine check dist/*
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
tmp_*
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
399
MODELS.md
Normal file
@@ -0,0 +1,399 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|---------------------|--------|--------|--------|--------|--------|----------------------------|
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|-----------------|--------|-------|--------|--------|--------|----------------------------|
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|-------------|--------|-------|----------------------------|
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-----------------|-----------------|--------|------|-------|-------|-------|----------|--------------------|
|
||||
| `MNET_025` | MobileNetV1 0.25| 0.36M | 1MB | 98.76%| 92.02%| 82.37%| 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55%| 94.87%| 86.89%| 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30%| 93.77%| 85.29%| 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53%| 94.56%| 86.79%| 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-------------|----------|--------|------|-------|-------|-------|----------|----------------------|
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67%| 95.61%| 88.75%| 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72%| 95.64%| 89.92%| 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
|------------|--------|--------|------|-----------------------------|
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
|------------|-------------|--------|------|-------------------|
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
|--------------|---------|--------|------|-----------------------|
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection Guide
|
||||
|
||||
### By Use Case
|
||||
|
||||
#### Mobile/Edge Devices
|
||||
- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `MobileFace(MNET_V2)`
|
||||
- **Priority**: Speed, small model size
|
||||
|
||||
#### Real-Time Applications (Webcam, Video)
|
||||
- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `ArcFace(MNET)`
|
||||
- **Priority**: Speed-accuracy balance
|
||||
|
||||
#### High-Accuracy Applications (Security, Verification)
|
||||
- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Maximum accuracy
|
||||
|
||||
#### Server/Cloud Deployment
|
||||
- **Detection**: `SCRFD(SCRFD_10G)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Accuracy, batch processing
|
||||
|
||||
---
|
||||
|
||||
### By Hardware
|
||||
|
||||
#### Apple Silicon (M1/M2/M3/M4)
|
||||
**Recommended**: All models work well with ARM64 optimizations (automatically included)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
|
||||
|
||||
**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### NVIDIA GPU (CUDA)
|
||||
**Recommended**: Larger models for maximum throughput
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
|
||||
- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
|
||||
- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
|
||||
|
||||
#### CPU Only
|
||||
**Recommended**: Lightweight models
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
|
||||
|
||||
**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
|
||||
|
||||
---
|
||||
|
||||
## Benchmark Details
|
||||
|
||||
### How to Benchmark
|
||||
|
||||
Run benchmarks on your own hardware:
|
||||
|
||||
```bash
|
||||
# Detection speed
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
|
||||
# Compare models
|
||||
python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
|
||||
python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
|
||||
```
|
||||
|
||||
### Accuracy Metrics Explained
|
||||
|
||||
- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
|
||||
- **Easy**: Large faces (>50px), clear backgrounds
|
||||
- **Medium**: Medium-sized faces (30-50px), moderate occlusion
|
||||
- **Hard**: Small faces (<30px), heavy occlusion, blur
|
||||
|
||||
*Accuracy values are from the original papers - see references below*
|
||||
|
||||
- **Model Size**: ONNX model file size (affects download time and memory)
|
||||
- **Params**: Number of model parameters (affects inference speed)
|
||||
|
||||
### Important Notes
|
||||
|
||||
1. **Speed varies by**:
|
||||
- Image resolution
|
||||
- Number of faces in image
|
||||
- Hardware (CPU/GPU/CoreML)
|
||||
- Batch size
|
||||
- Operating system
|
||||
|
||||
2. **Accuracy varies by**:
|
||||
- Image quality
|
||||
- Lighting conditions
|
||||
- Face pose and occlusion
|
||||
- Demographic factors
|
||||
|
||||
3. **Always benchmark on your specific use case** before choosing a model
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
|
||||
372
QUICKSTART.md
Normal file
@@ -0,0 +1,372 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
520
README.md
@@ -1,239 +1,457 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://www.python.org/dev/peps/pep-0008/)
|
||||
[](https://github.com/yakhyo/uniface/releases)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
</div>
|
||||
|
||||
**uniface** is a lightweight face detection library designed for high-performance face localization and landmark detection. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
- [ ] Age and gender detection (Planned).
|
||||
- [ ] Face recognition (Planned).
|
||||
- [x] High-speed face detection using ONNX models (Added: 2024-11-20).
|
||||
- [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20).
|
||||
- [x] Easy-to-use API for inference and visualization (Added: 2024-11-20).
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Using pip
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Local installation using pip
|
||||
### Platform-Specific Installation
|
||||
|
||||
**Clone the repository**
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
```
|
||||
|
||||
**Install using pip**
|
||||
|
||||
```bash
|
||||
pip install .
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Initialize the Model
|
||||
### Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize the RetinaFace model
|
||||
uniface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2", # Model name
|
||||
conf_thresh=0.5, # Confidence threshold
|
||||
pre_nms_topk=5000, # Pre-NMS Top-K detections
|
||||
nms_thresh=0.4, # NMS IoU threshold
|
||||
post_nms_topk=750 # Post-NMS Top-K detections
|
||||
# Initialize detector
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Run Inference
|
||||
### Direct Model Instantiation
|
||||
|
||||
Inference on image:
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
### Webcam Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Load an image
|
||||
image_path = "assets/test.jpg"
|
||||
original_image = cv2.imread(image_path)
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(original_image)
|
||||
|
||||
# Visualize results
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
|
||||
|
||||
# Save the output image
|
||||
output_path = "output.jpg"
|
||||
cv2.imwrite(output_path, original_image)
|
||||
print(f"Saved output image to {output_path}")
|
||||
```
|
||||
|
||||
Inference on video:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize the webcam
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Error: Unable to access the webcam.")
|
||||
exit()
|
||||
|
||||
while True:
|
||||
# Capture a frame from the webcam
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to read frame.")
|
||||
break
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(frame)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw detections on the frame
|
||||
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Display the output
|
||||
cv2.imshow("Webcam Inference", frame)
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Exit if 'q' is pressed
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release the webcam and close all OpenCV windows
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
### Evaluation results of available models on WiderFace
|
||||
## Advanced Configuration
|
||||
|
||||
| RetinaFace Models | Easy | Medium | Hard |
|
||||
| ------------------ | ---------- | ---------- | ---------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
|
||||
| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
|
||||
| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
|
||||
| retinaface_r18 | 92.50% | 91.02% | 86.63% |
|
||||
| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
## API Reference
|
||||
|
||||
### `RetinaFace` Class
|
||||
|
||||
#### Initialization
|
||||
```python
|
||||
RetinaFace(
|
||||
model: str,
|
||||
conf_thresh: float = 0.5,
|
||||
pre_nms_topk: int = 5000,
|
||||
nms_thresh: float = 0.4,
|
||||
post_nms_topk: int = 750
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
**Parameters**:
|
||||
- `model` *(str)*: Name of the model to use. Supported models:
|
||||
- `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
|
||||
- `retinaface_r18`, `retinaface_r34`
|
||||
- `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections.
|
||||
- `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS.
|
||||
- `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression.
|
||||
- `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS.
|
||||
### Logging Configuration
|
||||
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `detect` Method
|
||||
```python
|
||||
detect(
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: str = "default",
|
||||
center_weight: float = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Detects faces in the given image and returns bounding boxes and landmarks.
|
||||
|
||||
**Parameters**:
|
||||
- `image` *(np.ndarray)*: Input image in BGR format.
|
||||
- `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all.
|
||||
- `metric` *(str, default="default")*: Metric for prioritizing detections:
|
||||
- `"default"`: Prioritize detections closer to the image center.
|
||||
- `"max"`: Prioritize larger bounding box areas.
|
||||
- `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces.
|
||||
|
||||
**Returns**:
|
||||
- `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
|
||||
- `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
|
||||
|
||||
---
|
||||
|
||||
### Visualization Utilities
|
||||
## Development
|
||||
|
||||
#### `draw_detections`
|
||||
```python
|
||||
draw_detections(
|
||||
image: np.ndarray,
|
||||
detections: Tuple[np.ndarray, np.ndarray],
|
||||
vis_threshold: float
|
||||
) -> None
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Draws bounding boxes and landmarks on the given image.
|
||||
### Code Formatting
|
||||
|
||||
**Parameters**:
|
||||
- `image` *(np.ndarray)*: The input image in BGR format.
|
||||
- `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks.
|
||||
- `vis_threshold` *(float)*: Minimum confidence score for visualization.
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions to enhance the library! Feel free to:
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
|
||||
- Submit bug reports or feature requests.
|
||||
- Fork the repository and create a pull request.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
|
||||
- Inspired by InsightFace and other face detection projects.
|
||||
|
||||
---
|
||||
|
||||
BIN
assets/alignment_result.png
Normal file
|
After Width: | Height: | Size: 996 KiB |
BIN
assets/test_images/image0.jpg
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
assets/test_images/image1.jpg
Normal file
|
After Width: | Height: | Size: 6.6 KiB |
BIN
assets/test_images/image2.jpg
Normal file
|
After Width: | Height: | Size: 9.1 KiB |
BIN
assets/test_images/image3.jpg
Normal file
|
After Width: | Height: | Size: 9.8 KiB |
BIN
assets/test_images/image4.jpg
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
assets/test_result.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
0
examples/.gitkeep
Normal file
220
examples/face_alignment.ipynb
Normal file
292
examples/face_detection.ipynb
Normal file
47
pyproject.toml
Normal file
@@ -0,0 +1,47 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.1.1"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
]
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = {} }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = ["uniface"]
|
||||
@@ -1,8 +1,8 @@
|
||||
pytest
|
||||
numpy
|
||||
opencv-python
|
||||
opencv-python-headless
|
||||
onnx
|
||||
onnxruntime
|
||||
requests
|
||||
torch
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
68
scripts/README.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--detector` | Choose detector: `retinaface` or `scrfd` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.6) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
96
scripts/batch_process.py
Normal file
@@ -0,0 +1,96 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f"*.{ext}"))
|
||||
files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Input directory")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output directory")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions")
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(","))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No images found with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Found {len(image_files)} images")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc="Processing", unit="img"):
|
||||
out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}"
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f"\nFailed: {img_path.name}")
|
||||
|
||||
print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
60
scripts/download_model.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
MODEL_TYPES = {
|
||||
"retinaface": RetinaFaceWeights,
|
||||
"sphereface": SphereFaceWeights,
|
||||
"mobileface": MobileFaceWeights,
|
||||
"arcface": ArcFaceWeights,
|
||||
"scrfd": SCRFDWeights,
|
||||
"ddamfn": DDAMFNWeights,
|
||||
"agegender": AgeGenderWeights,
|
||||
"landmark": LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f" Done: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f" Failed: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download model weights")
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
type=str,
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help="Model type to download. If not specified, downloads all.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_type:
|
||||
print(f"Downloading {args.model_type} models...")
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
print("Downloading all models...")
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f"\n{name}:")
|
||||
download_models(model_enum)
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on errors
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")"/..
|
||||
|
||||
echo "Deleting existing release-related files..."
|
||||
rm -rf dist/ build/ *.egg-info
|
||||
|
||||
pip install --upgrade pip
|
||||
pip install twine
|
||||
|
||||
echo "Creating a package for the current release (PyPI compatible)..."
|
||||
python3 setup.py sdist bdist_wheel
|
||||
|
||||
echo "Release package created successfully in the 'dist/' folder."
|
||||
|
||||
|
||||
echo "Uploading the package to PyPI..."
|
||||
twine upload dist/*
|
||||
|
||||
echo "Release uploaded successfully!"
|
||||
124
scripts/run_age_gender.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = "outputs",
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face["bbox"])
|
||||
print(f" Face {i + 1}: {gender}, {age} years old")
|
||||
draw_age_gender_label(image, face["bbox"], gender, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face["bbox"]) # predict per face
|
||||
draw_age_gender_label(frame, face["bbox"], gender, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
95
scripts/run_detection.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face["bbox"] for face in faces]
|
||||
scores = [face["confidence"] for face in faces]
|
||||
landmarks = [face["landmarks"] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.method == "retinaface" else SCRFD()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
97
scripts/run_face_search.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face search using a reference image")
|
||||
parser.add_argument("--image", type=str, required=True, help="Reference face image")
|
||||
parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold")
|
||||
parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f"Loading reference: {args.image}")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
117
scripts/run_landmarks.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i + 1}: {len(landmarks)} landmarks")
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f"Face {i + 1}",
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
103
scripts/run_recognition.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...")
|
||||
|
||||
landmarks = np.array(faces[0]["landmarks"]) # 5-point landmarks for alignment
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
print(f" Embedding shape: {embedding.shape}")
|
||||
print(f" L2 norm (raw): {np.linalg.norm(embedding):.4f}")
|
||||
print(f" L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print("Error: Failed to load one or both images")
|
||||
return
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print("Error: No faces detected in one or both images")
|
||||
return
|
||||
|
||||
landmarks1 = np.array(faces1[0]["landmarks"])
|
||||
landmarks2 = np.array(faces2[0]["landmarks"])
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison")
|
||||
parser.add_argument("--image", type=str, help="Single image for embedding extraction")
|
||||
parser.add_argument("--image1", type=str, help="First image for comparison")
|
||||
parser.add_argument("--image2", type=str, help="Second image for comparison")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide --image or both --image1 and --image2")
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
107
scripts/run_video_detection.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
|
||||
print(f"Output: {output_path}")
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
print("\nCancelled by user")
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
|
||||
print(f"Saved: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Input video path")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output video path")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--preview", action="store_true", help="Show live preview")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
28
scripts/sha256_generate.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Compute SHA256 hash of a file")
|
||||
parser.add_argument("file", type=Path, help="Path to file")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f"File does not exist: {args.file}")
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
43
setup.py
@@ -1,43 +0,0 @@
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
# Read the README file for the long description
|
||||
long_description = ""
|
||||
if os.path.exists("README.md"):
|
||||
with open("README.md", "r", encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name="uniface",
|
||||
version="0.1.1",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"numpy",
|
||||
"opencv-python",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"requests",
|
||||
"torch"
|
||||
],
|
||||
extras_require={
|
||||
"dev": ["pytest"],
|
||||
},
|
||||
description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
author="Yakhyokhuja Valikhujaev",
|
||||
author_email="yakhyo9696@gmail.com",
|
||||
url="https://github.com/yakhyo/uniface",
|
||||
license="MIT",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
],
|
||||
keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
|
||||
python_requires=">=3.8",
|
||||
)
|
||||
57
test.py
@@ -1,57 +0,0 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace, draw_detections
|
||||
|
||||
|
||||
def run_inference(image_path, save_image=False, vis_threshold=0.6):
|
||||
"""
|
||||
Perform inference on an image, draw detections, and optionally save the output image.
|
||||
|
||||
Args:
|
||||
image_path (str): Path to the input image.
|
||||
save_image (bool): Whether to save the output image with detections.
|
||||
vis_threshold (float): Confidence threshold for displaying detections.
|
||||
"""
|
||||
# Load the image
|
||||
original_image = cv2.imread(image_path)
|
||||
if original_image is None:
|
||||
print(f"Error: Could not read image from {image_path}")
|
||||
return
|
||||
|
||||
# Perform face detection
|
||||
boxes, landmarks = retinaface_inference.detect(original_image)
|
||||
|
||||
# Draw detections on the image
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold)
|
||||
|
||||
# Save the output image if requested
|
||||
if save_image:
|
||||
im_name = os.path.splitext(os.path.basename(image_path))[0]
|
||||
save_name = f"{im_name}_out.jpg"
|
||||
cv2.imwrite(save_name, original_image)
|
||||
print(f"Image saved at '{save_name}'")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import time
|
||||
|
||||
# Initialize and run the ONNX inference
|
||||
retinaface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2",
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
img_path = "assets/test.jpg"
|
||||
avg = 0
|
||||
for _ in range(50):
|
||||
st = time.time()
|
||||
run_inference(img_path, save_image=True, vis_threshold=0.6)
|
||||
d = time.time() - st
|
||||
print(d)
|
||||
avg += d
|
||||
print("avg", avg / 50)
|
||||
116
tests/test_age_gender.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def age_gender_model():
|
||||
return AgeGender()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, "AgeGender model initialization failed."
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender, str), f"Gender should be string, got {type(gender)}"
|
||||
assert isinstance(age, int), f"Age should be int, got {type(age)}"
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ["Male", "Female"], f"Gender should be 'Male' or 'Female', got '{gender}'"
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}"
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
test_bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[100, 100, 300, 300],
|
||||
[50, 50, 400, 400],
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender in ["Male", "Female"], f"Failed for bbox {bbox}"
|
||||
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ["Male", "Female"], f"Failed for image size {size}"
|
||||
assert 0 <= age <= 120, f"Age out of range for image size {size}"
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender1 == gender2, "Same input should produce same gender prediction"
|
||||
assert age1 == age2, "Same input should produce same age prediction"
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender in ["Male", "Female"], "Should work with bbox as list"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender in ["Male", "Female"], "Should work with bbox as numpy array"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[200, 200, 350, 350],
|
||||
[400, 400, 550, 550],
|
||||
]
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender, age))
|
||||
|
||||
assert len(results) == 3, "Should have 3 predictions"
|
||||
for gender, age in results:
|
||||
assert gender in ["Male", "Female"]
|
||||
assert 0 <= age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f"Age should be non-negative, got {age}"
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f"{gender}, {age}y"
|
||||
assert isinstance(text, str), "Should be able to format as string"
|
||||
assert "Male" in text or "Female" in text, "Text should contain gender"
|
||||
assert "y" in text, "Text should contain 'y' for years"
|
||||
274
tests/test_factory.py
Normal file
@@ -0,0 +1,274 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import (
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
|
||||
# create_detector tests
|
||||
def test_create_detector_retinaface():
|
||||
"""
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector("retinaface")
|
||||
assert detector is not None, "Failed to create RetinaFace detector"
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
"""
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector("scrfd")
|
||||
assert detector is not None, "Failed to create SCRFD detector"
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
"""
|
||||
Test creating detector with custom configuration.
|
||||
"""
|
||||
detector = create_detector(
|
||||
"retinaface",
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
)
|
||||
assert detector is not None, "Failed to create detector with custom config"
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
"""
|
||||
Test that invalid detector method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_detector("invalid_method")
|
||||
|
||||
|
||||
def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector("scrfd", model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert detector is not None, "Failed to create SCRFD with specific model"
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
def test_create_recognizer_arcface():
|
||||
"""
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("arcface")
|
||||
assert recognizer is not None, "Failed to create ArcFace recognizer"
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
"""
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("mobileface")
|
||||
assert recognizer is not None, "Failed to create MobileFace recognizer"
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
"""
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("sphereface")
|
||||
assert recognizer is not None, "Failed to create SphereFace recognizer"
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
"""
|
||||
Test that invalid recognizer method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_recognizer("invalid_method")
|
||||
|
||||
|
||||
# create_landmarker tests
|
||||
def test_create_landmarker():
|
||||
"""
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker("2d106det")
|
||||
assert landmarker is not None, "Failed to create Landmark106 detector"
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
"""
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, "Failed to create default landmarker"
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
"""
|
||||
Test that invalid landmarker method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_landmarker("invalid_method")
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="retinaface")
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="scrfd")
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="retinaface", conf_thresh=0.8)
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face["confidence"] >= 0.8, "All detections should meet confidence threshold"
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list with default method"
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method="retinaface")
|
||||
|
||||
assert isinstance(faces, list), "Should return a list even for empty image"
|
||||
assert len(faces) == 0, "Should detect no faces in blank image"
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
def test_list_available_detectors():
|
||||
"""
|
||||
Test that list_available_detectors returns a dictionary.
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), "Should return a dictionary of detectors"
|
||||
assert len(detectors) > 0, "Should have at least one detector available"
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
"""
|
||||
Test that list includes known detectors.
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
# Should include at least these detectors
|
||||
assert "retinaface" in detectors, "Should include 'retinaface'"
|
||||
assert "scrfd" in detectors, "Should include 'scrfd'"
|
||||
|
||||
|
||||
# Integration tests
|
||||
def test_detector_inference_from_factory():
|
||||
"""
|
||||
Test that detector created from factory can perform inference.
|
||||
"""
|
||||
detector = create_detector("retinaface")
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detector should return list of faces"
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
"""
|
||||
Test that recognizer created from factory can perform inference.
|
||||
"""
|
||||
recognizer = create_recognizer("arcface")
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, "Recognizer should return embedding"
|
||||
assert embedding.shape[1] == 512, "Should return 512-dimensional embedding"
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
"""
|
||||
Test that landmarker created from factory can perform inference.
|
||||
"""
|
||||
landmarker = create_landmarker("2d106det")
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, "Landmarker should return landmarks"
|
||||
assert landmarks.shape == (106, 2), "Should return 106 landmarks"
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
"""
|
||||
Test that multiple detectors can be created independently.
|
||||
"""
|
||||
detector1 = create_detector("retinaface")
|
||||
detector2 = create_detector("scrfd")
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, "Should create separate instances"
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector("retinaface", conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector("retinaface", conf_thresh=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces_high = detector_high_thresh.detect(mock_image)
|
||||
faces_low = detector_low_thresh.detect(mock_image)
|
||||
|
||||
# Both should work
|
||||
assert isinstance(faces_high, list)
|
||||
assert isinstance(faces_low, list)
|
||||
|
||||
|
||||
def test_factory_returns_correct_types():
|
||||
"""
|
||||
Test that factory functions return instances of the correct types.
|
||||
"""
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
|
||||
detector = create_detector("retinaface")
|
||||
recognizer = create_recognizer("arcface")
|
||||
landmarker = create_landmarker("2d106det")
|
||||
|
||||
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
|
||||
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"
|
||||
assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance"
|
||||
107
tests/test_landmark.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def landmark_model():
|
||||
return Landmark106()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, "Landmark106 model initialization failed."
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}"
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}"
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
x_coords = landmarks[:, 0]
|
||||
y_coords = landmarks[:, 1]
|
||||
|
||||
x1, y1, x2, y2 = mock_bbox
|
||||
margin = 50
|
||||
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds"
|
||||
assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds"
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
test_bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[100, 100, 300, 300],
|
||||
[50, 50, 400, 400],
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}"
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape"
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type"
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks"
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for image size {size}"
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as list"
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as numpy array"
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, "Landmarks should have variation in x-coordinates"
|
||||
assert y_variance > 0, "Landmarks should have variation in y-coordinates"
|
||||
215
tests/test_recognition.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arcface_model():
|
||||
"""
|
||||
Fixture to initialize the ArcFace model for testing.
|
||||
"""
|
||||
return ArcFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mobileface_model():
|
||||
"""
|
||||
Fixture to initialize the MobileFace model for testing.
|
||||
"""
|
||||
return MobileFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sphereface_model():
|
||||
"""
|
||||
Fixture to initialize the SphereFace model for testing.
|
||||
"""
|
||||
return SphereFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_aligned_face():
|
||||
"""
|
||||
Create a mock 112x112 aligned face image.
|
||||
"""
|
||||
return np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
"""
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# ArcFace Tests
|
||||
def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, "ArcFace model initialization failed."
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that ArcFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
"""
|
||||
Test that normalized embeddings have unit length.
|
||||
"""
|
||||
# Create a larger mock image for alignment
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = arcface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}"
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that the same input produces the same embedding.
|
||||
"""
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), "Same input should produce same embedding"
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, "MobileFace model initialization failed."
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that MobileFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
"""
|
||||
Test that MobileFace normalized embeddings have unit length.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, "SphereFace model initialization failed."
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that SphereFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
"""
|
||||
Test that SphereFace normalized embeddings have unit length.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
def test_different_models_different_embeddings(arcface_model, mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that different models produce different embeddings for the same input.
|
||||
"""
|
||||
arcface_emb = arcface_model.get_embedding(mock_aligned_face)
|
||||
mobileface_emb = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), "Different models should produce different embeddings"
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test computing similarity between embeddings.
|
||||
"""
|
||||
# Get two embeddings
|
||||
emb1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# Create a slightly different image
|
||||
mock_aligned_face2 = mock_aligned_face.copy()
|
||||
mock_aligned_face2[:10, :10] = 0 # Modify a small region
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face2)
|
||||
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that the same face produces high similarity.
|
||||
"""
|
||||
emb1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}"
|
||||
@@ -1,15 +1,14 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from uniface import RetinaFace
|
||||
import pytest
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def retinaface_model():
|
||||
"""
|
||||
Fixture to initialize the RetinaFace model for testing.
|
||||
"""
|
||||
return RetinaFace(
|
||||
model="retinaface_mnet_v2",
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
@@ -18,61 +17,39 @@ def retinaface_model():
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
"""
|
||||
Test that the RetinaFace model initializes correctly.
|
||||
"""
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
"""
|
||||
Test inference on a 640x640 BGR image.
|
||||
"""
|
||||
# Generate a mock 640x640 BGR image
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
|
||||
# Check output types
|
||||
assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
|
||||
assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
# Check that detections have the expected shape
|
||||
if detections.size > 0: # If faces are detected
|
||||
assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
|
||||
# Check landmarks shape
|
||||
if landmarks.size > 0:
|
||||
assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
"""
|
||||
Test that detections respect the confidence threshold.
|
||||
"""
|
||||
# Generate a mock 640x640 BGR image
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Run inference
|
||||
detections, _ = retinaface_model.detect(mock_image)
|
||||
|
||||
# Ensure all detections have confidence scores above the threshold
|
||||
if detections.size > 0: # If faces are detected
|
||||
confidence_scores = detections[:, 4]
|
||||
assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
"""
|
||||
Test inference on an image without detectable faces.
|
||||
"""
|
||||
# Generate an empty (black) 640x640 image
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(empty_image)
|
||||
|
||||
# Ensure no detections or landmarks are found
|
||||
assert detections.size == 0, "Detections should be empty for a blank image."
|
||||
assert landmarks.size == 0, "Landmarks should be empty for a blank image."
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
|
||||
71
tests/test_scrfd.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, "Model initialization failed."
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f"Should return list for size {size}"
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, "SCRFD 10G model initialization failed."
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), "SCRFD 10G should return list of detections."
|
||||
262
tests/test_utils.py
Normal file
@@ -0,0 +1,262 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import compute_similarity, face_alignment
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
"""
|
||||
Create a mock 640x640 BGR image.
|
||||
"""
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
Standard positions for a face roughly centered at (112/2, 112/2).
|
||||
"""
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041], # Right mouth corner
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# compute_similarity tests
|
||||
def test_compute_similarity_same_embedding():
|
||||
"""
|
||||
Test that similarity of an embedding with itself is 1.0.
|
||||
"""
|
||||
embedding = np.random.randn(1, 512).astype(np.float32)
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}"
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
"""
|
||||
Test that similarity is always in the range [-1, 1].
|
||||
"""
|
||||
# Test with multiple random embeddings
|
||||
for _ in range(10):
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
"""
|
||||
Test that orthogonal embeddings have similarity close to 0.
|
||||
"""
|
||||
# Create orthogonal embeddings
|
||||
emb1 = np.zeros((1, 512), dtype=np.float32)
|
||||
emb1[0, 0] = 1.0 # [1, 0, 0, ..., 0]
|
||||
|
||||
emb2 = np.zeros((1, 512), dtype=np.float32)
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}"
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
"""
|
||||
Test that opposite embeddings have similarity close to -1.
|
||||
"""
|
||||
emb1 = np.ones((1, 512), dtype=np.float32)
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}"
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
"""
|
||||
Test that similarity(A, B) == similarity(B, A).
|
||||
"""
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), "Similarity should be symmetric"
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
"""
|
||||
Test that compute_similarity returns a float.
|
||||
"""
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}"
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that face_alignment produces output with the correct shape.
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}"
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face has the correct data type.
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}"
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test face alignment with different output sizes.
|
||||
"""
|
||||
# Only test sizes that are multiples of 112 or 128 as required by the function
|
||||
test_sizes = [(112, 112), (128, 128), (224, 224)]
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f"Failed for size {size}"
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that the same input produces the same aligned face.
|
||||
"""
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face"
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
"""
|
||||
Test that landmarks can be passed as a list of lists (converted to array).
|
||||
"""
|
||||
landmarks_list = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
]
|
||||
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with landmarks as array"
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face pixel values are in valid range [0, 255].
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), "Pixel values should be >= 0"
|
||||
assert np.all(aligned <= 255), "Pixel values should be <= 255"
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face is not all zeros (actual transformation occurred).
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), "Aligned face should have some non-zero pixels"
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
"""
|
||||
Test alignment with landmarks at different positions in the image.
|
||||
"""
|
||||
# Landmarks at different positions
|
||||
positions = [
|
||||
np.array(
|
||||
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
]
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}"
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
"""
|
||||
Test that face_alignment works specifically with 5-point landmarks.
|
||||
"""
|
||||
# Standard 5-point landmarks
|
||||
landmarks_5pt = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
"""
|
||||
Test compute_similarity with realistic embedding dimensions.
|
||||
"""
|
||||
# Simulate ArcFace/MobileFace/SphereFace embeddings (512-dim)
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize (as done in get_normalized_embedding)
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
@@ -11,18 +11,59 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "1.1.1"
|
||||
|
||||
from uniface.retinaface import RetinaFace
|
||||
from uniface.log import Logger
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.version import __version__, __author__
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
from .attribute import AgeGender
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__license__",
|
||||
"__version__",
|
||||
"__author__"
|
||||
# Factory functions
|
||||
"create_detector",
|
||||
"create_landmarker",
|
||||
"create_recognizer",
|
||||
"detect_faces",
|
||||
"list_available_detectors",
|
||||
# Detection models
|
||||
"RetinaFace",
|
||||
"Logger",
|
||||
"SCRFD",
|
||||
# Recognition models
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
# Landmark models
|
||||
"Landmark106",
|
||||
# Attribute models
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
# Utilities
|
||||
"compute_similarity",
|
||||
"draw_detections",
|
||||
"face_alignment",
|
||||
"verify_model_weights",
|
||||
"draw_detections"
|
||||
"Logger",
|
||||
"enable_logging",
|
||||
]
|
||||
|
||||
99
uniface/attribute/__init__.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = ["AgeGender", "Emotion", "create_attribute_predictor", "predict_attributes"]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
|
||||
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
This high-level API simplifies the creation of attribute models by
|
||||
dynamically selecting the correct class based on the provided model enum.
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
"""
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(
|
||||
f"Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights."
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if "attributes" not in face:
|
||||
face["attributes"] = {}
|
||||
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender, age = predictor(image, face["bbox"])
|
||||
face["attributes"]["gender"] = gender
|
||||
face["attributes"]["age"] = age
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face["landmark"])
|
||||
face["attributes"]["emotion"] = emotion
|
||||
face["attributes"]["confidence"] = confidence
|
||||
|
||||
return detections
|
||||
187
uniface/attribute/age_gender.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights
|
||||
from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ["AgeGender"]
|
||||
|
||||
|
||||
class AgeGender(Attribute):
|
||||
"""
|
||||
Age and gender prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender (0 for female,
|
||||
1 for male) from a face image. It requires a bounding box to locate the face.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights
|
||||
to load.
|
||||
"""
|
||||
Logger.info(f"Initializing AgeGender with model={model_name.name}")
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
bbox = np.asarray(bbox)
|
||||
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
||||
scale = self.input_size[1] / (max(width, height) * 1.5)
|
||||
|
||||
# **Rotation parameter restored here**
|
||||
rotation = 0.0
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[str, int]:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
Args:
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label ("Female" or "Male")
|
||||
and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = "Female" if gender_id == 0 else "Male"
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender, age
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[str, int]:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label and age.
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender, age = self.postprocess(prediction)
|
||||
return gender, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection["bbox"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender, age = age_gender_predictor.predict(frame, box)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{gender}, {age}"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
92
uniface/attribute/base.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
Abstract base class for face attribute models.
|
||||
|
||||
This class defines the common interface that all attribute models
|
||||
(e.g., age-gender, emotion) must implement. It ensures a consistent API
|
||||
across different attribute prediction modules in the library, making them
|
||||
interchangeable and easy to use.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
|
||||
warm-up procedures to prepare the model for prediction.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the _initialize_model method.")
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
|
||||
"""
|
||||
Preprocesses the input data for the model.
|
||||
|
||||
This method should take a raw image and any other necessary data
|
||||
(like bounding boxes or landmarks) and convert it into the format
|
||||
expected by the model's inference engine (e.g., a blob or tensor).
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image containing the face, typically
|
||||
in BGR format.
|
||||
*args: Additional arguments required for preprocessing, such as
|
||||
bounding boxes or facial landmarks.
|
||||
|
||||
Returns:
|
||||
The preprocessed data ready for model inference.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the preprocess method.")
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, prediction: Any) -> Any:
|
||||
"""
|
||||
Postprocesses the raw model output into a human-readable format.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a meaningful result, such as an age value, a gender
|
||||
label, or an emotion category.
|
||||
|
||||
Args:
|
||||
prediction (Any): The raw output from the model's inference.
|
||||
|
||||
Returns:
|
||||
The final, processed attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the postprocess method.")
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any:
|
||||
"""
|
||||
Performs end-to-end attribute prediction on a given image.
|
||||
|
||||
This method orchestrates the full pipeline: it calls the preprocess,
|
||||
inference, and postprocess steps to return the final, user-friendly
|
||||
attribute prediction.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image containing the face.
|
||||
*args: Additional data required for prediction, such as a bounding
|
||||
box or landmarks.
|
||||
|
||||
Returns:
|
||||
The final predicted attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the predict method.")
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `predict` method.
|
||||
"""
|
||||
return self.predict(*args, **kwargs)
|
||||
187
uniface/attribute/emotion.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
__all__ = ["Emotion"]
|
||||
|
||||
|
||||
class Emotion(Attribute):
|
||||
"""
|
||||
Emotion recognition model using a TorchScript model.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting one of several emotion categories from a face
|
||||
image. It requires 5-point facial landmarks for alignment.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
|
||||
Args:
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f"Initializing Emotion with model={model_weights.name}")
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = [
|
||||
"Neutral",
|
||||
"Happy",
|
||||
"Sad",
|
||||
"Surprise",
|
||||
"Fear",
|
||||
"Disgust",
|
||||
"Angry",
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append("Contempt")
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Loads and initializes the TorchScript model for inference.
|
||||
"""
|
||||
try:
|
||||
self.model = torch.jit.load(self.model_path, map_location=self.device)
|
||||
self.model.eval()
|
||||
# Warm-up with a dummy input for faster first inference
|
||||
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
|
||||
with torch.no_grad():
|
||||
self.model(dummy_input)
|
||||
Logger.info(f"Successfully initialized Emotion model on {self.device}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
landmark (Union[List, np.ndarray]): The 5-point facial landmarks.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The preprocessed image tensor ready for inference.
|
||||
"""
|
||||
landmark = np.asarray(landmark)
|
||||
|
||||
aligned_image, _ = face_alignment(image, landmark)
|
||||
|
||||
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
|
||||
rgb_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB)
|
||||
resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
normalized_image = (resized_image - mean) / std
|
||||
transposed_image = normalized_image.transpose((2, 0, 1))
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
probabilities = torch.nn.functional.softmax(prediction, dim=1).squeeze().cpu().numpy()
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
input_tensor = self.preprocess(image, landmark)
|
||||
with torch.no_grad():
|
||||
output = self.model(input_tensor)
|
||||
if isinstance(output, tuple):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection["bbox"]
|
||||
landmark = detection["landmarks"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{emotion} ({confidence:.2f})"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
@@ -1,14 +1,13 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import itertools
|
||||
import numpy as np
|
||||
import math
|
||||
from typing import List, Tuple
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
@@ -44,7 +43,7 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
|
||||
@@ -52,7 +51,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Anchor box coordinates as a tensor.
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
@@ -60,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
@@ -79,11 +73,11 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = torch.Tensor(anchors).view(-1, 4)
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
def nms(dets: List[np.ndarray], threshold: float):
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
@@ -123,56 +117,57 @@ def nms(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
|
||||
Returns:
|
||||
tensor: Decoded bounding box predictions
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
"""
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = torch.empty_like(loc)
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
|
||||
Decode landmark predictions using prior boxes.
|
||||
|
||||
Args:
|
||||
predictions (tensor): Landmark predictions for localization layers.
|
||||
Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
|
||||
priors (tensor): Prior boxes in center-offset form.
|
||||
Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
|
||||
variances (list[float]): Variances of the prior boxes to scale the decoded values.
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
|
||||
Returns:
|
||||
landmarks (tensor): Decoded landmark predictions.
|
||||
Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
|
||||
predictions = predictions.view(predictions.size(0), 5, 2)
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Perform the same operation on all landmark pairs at once
|
||||
landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.view(landmarks.size(0), -1)
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
@@ -1,26 +1,147 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
MODEL_URLS: Dict[str, str] = {
|
||||
'retinaface_mnet025': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1_0.25.onnx',
|
||||
'retinaface_mnet050': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1_0.50.onnx',
|
||||
'retinaface_mnet_v1': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1.onnx',
|
||||
'retinaface_mnet_v2': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv2.onnx',
|
||||
'retinaface_r18': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_r18.onnx',
|
||||
'retinaface_r34': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_r34.onnx'
|
||||
# fmt: off
|
||||
class SphereFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on MS1M V2 dataset with 5.8 million images of 85k identities.
|
||||
https://github.com/yakhyo/face-recognition
|
||||
"""
|
||||
SPHERE20 = "sphere20"
|
||||
SPHERE36 = "sphere36"
|
||||
|
||||
class MobileFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on MS1M V2 dataset with 5.8 million images of 85k identities.
|
||||
https://github.com/yakhyo/face-recognition
|
||||
"""
|
||||
MNET_025 = "mobilenetv1_025"
|
||||
MNET_V2 = "mobilenetv2"
|
||||
MNET_V3_SMALL = "mobilenetv3_small"
|
||||
MNET_V3_LARGE = "mobilenetv3_large"
|
||||
|
||||
class ArcFaceWeights(str, Enum):
|
||||
"""
|
||||
Pretrained weights from ArcFace model (insightface).
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
MNET = "arcface_mnet"
|
||||
RESNET = "arcface_resnet"
|
||||
|
||||
class RetinaFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
https://github.com/yakhyo/retinaface-pytorch
|
||||
"""
|
||||
MNET_025 = "retinaface_mnet025"
|
||||
MNET_050 = "retinaface_mnet050"
|
||||
MNET_V1 = "retinaface_mnet_v1"
|
||||
MNET_V2 = "retinaface_mnet_v2"
|
||||
RESNET18 = "retinaface_r18"
|
||||
RESNET34 = "retinaface_r34"
|
||||
|
||||
|
||||
class SCRFDWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
SCRFD_10G_KPS = "scrfd_10g"
|
||||
SCRFD_500M_KPS = "scrfd_500m"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
"""
|
||||
Trained on AffectNet dataset.
|
||||
https://github.com/SainingZhang/DDAMFN/tree/main/DDAMFN
|
||||
"""
|
||||
AFFECNET7 = "affecnet7"
|
||||
AFFECNET8 = "affecnet8"
|
||||
|
||||
|
||||
class AgeGenderWeights(str, Enum):
|
||||
"""
|
||||
Trained on CelebA dataset.
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
https://github.com/deepinsight/insightface/tree/master/alignment/coordinate_reg
|
||||
"""
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
# fmt: on
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx",
|
||||
RetinaFaceWeights.MNET_050: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx",
|
||||
RetinaFaceWeights.MNET_V1: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx",
|
||||
RetinaFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx",
|
||||
RetinaFaceWeights.RESNET18: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx",
|
||||
RetinaFaceWeights.RESNET34: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx",
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx",
|
||||
MobileFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx",
|
||||
MobileFaceWeights.MNET_V3_SMALL: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx",
|
||||
MobileFaceWeights.MNET_V3_LARGE: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx",
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: "https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx",
|
||||
SphereFaceWeights.SPHERE36: "https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx",
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx",
|
||||
ArcFaceWeights.RESNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx",
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx",
|
||||
SCRFDWeights.SCRFD_500M_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx",
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script",
|
||||
DDAMFNWeights.AFFECNET8: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script",
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx",
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx",
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[str, str] = {
|
||||
'retinaface_mnet025': 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
'retinaface_mnet050': 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
'retinaface_mnet_v1': '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
'retinaface_mnet_v2': '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
'retinaface_r18': 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
'retinaface_r34': 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630'
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: "b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5",
|
||||
RetinaFaceWeights.MNET_050: "d8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37",
|
||||
RetinaFaceWeights.MNET_V1: "75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153",
|
||||
RetinaFaceWeights.MNET_V2: "3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757",
|
||||
RetinaFaceWeights.RESNET18: "e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d",
|
||||
RetinaFaceWeights.RESNET34: "bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630",
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: "eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b",
|
||||
MobileFaceWeights.MNET_V2: "38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89",
|
||||
MobileFaceWeights.MNET_V3_SMALL: "d4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3",
|
||||
MobileFaceWeights.MNET_V3_LARGE: "0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a",
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: "c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d",
|
||||
SphereFaceWeights.SPHERE36: "13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20",
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: "9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f",
|
||||
ArcFaceWeights.RESNET: "4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43",
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: "5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91",
|
||||
SCRFDWeights.SCRFD_500M_KPS: "5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a",
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: "10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d",
|
||||
DDAMFNWeights.AFFECNET8: "8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487",
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: "4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb",
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: "f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf",
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
143
uniface/detection/__init__.py
Normal file
@@ -0,0 +1,143 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = "retinaface", **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f"{method_name}_{str(sorted_kwargs)}"
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
_detector_cache[cache_key] = create_detector(method, **kwargs)
|
||||
|
||||
detector = _detector_cache[cache_key]
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = "retinaface", **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
**kwargs: Detector-specific parameters
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
|
||||
Examples:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... )
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == "retinaface":
|
||||
return RetinaFace(**kwargs)
|
||||
|
||||
elif method == "scrfd":
|
||||
return SCRFD(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ["retinaface", "scrfd"]
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
"""
|
||||
return {
|
||||
"retinaface": {
|
||||
"description": "RetinaFace detector with high accuracy",
|
||||
"supports_landmarks": True,
|
||||
"paper": "https://arxiv.org/abs/1905.00641",
|
||||
"default_params": {
|
||||
"model_name": "mnet_v2",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"input_size": (640, 640),
|
||||
},
|
||||
},
|
||||
"scrfd": {
|
||||
"description": "SCRFD detector - fast and accurate with efficient architecture",
|
||||
"supports_landmarks": True,
|
||||
"paper": "https://arxiv.org/abs/2105.04714",
|
||||
"default_params": {
|
||||
"model_name": "scrfd_10g_kps",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"input_size": (640, 640),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"detect_faces",
|
||||
"create_detector",
|
||||
"list_available_detectors",
|
||||
"SCRFD",
|
||||
"RetinaFace",
|
||||
"BaseDetector",
|
||||
]
|
||||
101
uniface/detection/base.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Base classes for face detection.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
Abstract base class for all face detectors.
|
||||
|
||||
This class defines the interface that all face detectors must implement,
|
||||
ensuring consistency across different detection methods.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the detector with configuration parameters."""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array with shape (H, W, C)
|
||||
**kwargs: Additional detection parameters
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
- detections: Bounding boxes with confidence scores, shape (N, 5)
|
||||
Format: [x_min, y_min, x_max, y_max, confidence]
|
||||
- landmarks: Facial landmark points, shape (N, 5, 2) for 5-point landmarks
|
||||
or (N, 68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess input image for detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
**kwargs: Additional postprocessing parameters
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
return f"{self.__class__.__name__}({self.config})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Detailed string representation."""
|
||||
return self.__str__()
|
||||
|
||||
@property
|
||||
def supports_landmarks(self) -> bool:
|
||||
"""
|
||||
Whether this detector supports landmark detection.
|
||||
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
"""
|
||||
return hasattr(self, "_supports_landmarks") and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detector information and configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Detector information
|
||||
"""
|
||||
return {
|
||||
"name": self.__class__.__name__,
|
||||
"supports_landmarks": self._supports_landmarks,
|
||||
"config": self.config,
|
||||
}
|
||||
354
uniface/detection/retinaface.py
Normal file
@@ -0,0 +1,354 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import (
|
||||
decode_boxes,
|
||||
decode_landmarks,
|
||||
generate_anchors,
|
||||
non_max_supression,
|
||||
resize_image,
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace(BaseDetector):
|
||||
"""
|
||||
Face detector based on the RetinaFace architecture.
|
||||
|
||||
Title: "RetinaFace: Single-stage Dense Face Localisation in the Wild"
|
||||
Paper: https://arxiv.org/abs/1905.00641
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments passed to BaseDetector and RetinaFace. Supported keys include:
|
||||
model_name (RetinaFaceWeights, optional): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float, optional): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int, optional): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int, optional): Max number of detections kept after NMS. Defaults to 750.
|
||||
dynamic_size (bool, optional): If True, generate anchors dynamically per input image. Defaults to False.
|
||||
input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`. Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold for confidence-based filtering.
|
||||
nms_thresh (float): IoU threshold used for NMS.
|
||||
pre_nms_topk (int): Limit on proposals before applying NMS.
|
||||
post_nms_topk (int): Limit on retained detections after NMS.
|
||||
dynamic_size (bool): Flag indicating dynamic or static input sizing.
|
||||
input_size (Tuple[int, int]): Static input size if `dynamic_size=False`.
|
||||
_model_path (str): Absolute path to the verified model weights.
|
||||
_priors (np.ndarray): Precomputed anchor boxes (if static size).
|
||||
_supports_landmarks (bool): Indicates landmark prediction support.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = kwargs.get("model_name", RetinaFaceWeights.MNET_V2)
|
||||
self.conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
self.nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
self.pre_nms_topk = kwargs.get("pre_nms_topk", 5000)
|
||||
self.post_nms_topk = kwargs.get("post_nms_topk", 750)
|
||||
self.dynamic_size = kwargs.get("dynamic_size", False)
|
||||
self.input_size = kwargs.get("input_size", (640, 640))
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
|
||||
f"input_size={self.input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not self.dynamic_size and self.input_size is not None:
|
||||
self._priors = generate_anchors(image_size=self.input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess input image for model inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
|
||||
"""
|
||||
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
if self.dynamic_size:
|
||||
height, width, _ = image.shape
|
||||
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
|
||||
resize_factor = 1.0 # No resizing
|
||||
else:
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
height, width, _ = image.shape
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocessing
|
||||
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
|
||||
|
||||
if max_num > 0 and detections.shape[0] > max_num:
|
||||
# Calculate area of detections
|
||||
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == "max":
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(scores)[::-1][:max_num]
|
||||
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][: self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_supression(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = (
|
||||
detections[: self.post_nms_topk],
|
||||
landmarks[: self.post_nms_topk],
|
||||
)
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
landmark_scale = np.array([shape[0], shape[1]] * 5)
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
333
uniface/detection/scrfd.py
Normal file
@@ -0,0 +1,333 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import distance2bbox, distance2kps, non_max_supression, resize_image
|
||||
|
||||
__all__ = ["SCRFD"]
|
||||
|
||||
|
||||
class SCRFD(BaseDetector):
|
||||
"""
|
||||
Face detector based on the SCRFD architecture.
|
||||
|
||||
Title: "Sample and Computation Redistribution for Efficient Face Detection"
|
||||
Paper: https://arxiv.org/abs/2105.04714
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments passed to BaseDetector and SCRFD. Supported keys include:
|
||||
model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float, optional): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int], optional): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
|
||||
_fmc (int): Number of feature map levels used in the model.
|
||||
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
|
||||
_num_anchors (int): Number of anchors per feature location.
|
||||
_center_cache (Dict): Cached anchor centers for efficient forward passes.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
model_name = kwargs.get("model_name", SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
input_size = kwargs.get("input_size", (640, 640))
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.input_size = input_size
|
||||
|
||||
# ------- SCRFD model params ------
|
||||
self._fmc = 3
|
||||
self._feat_stride_fpn = [8, 16, 32]
|
||||
self._num_anchors = 2
|
||||
self._center_cache = {}
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f"Initializing SCRFD with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
|
||||
"""Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
|
||||
"""
|
||||
image = image.astype(np.float32)
|
||||
image = (image - 127.5) / 127.5
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
|
||||
scores_list = []
|
||||
bboxes_list = []
|
||||
kpss_list = []
|
||||
|
||||
image_size = image_size
|
||||
|
||||
fmc = self._fmc
|
||||
for idx, stride in enumerate(self._feat_stride_fpn):
|
||||
scores = outputs[idx]
|
||||
bbox_preds = outputs[fmc + idx] * stride
|
||||
kps_preds = outputs[2 * fmc + idx] * stride
|
||||
|
||||
# Generate anchors
|
||||
fm_height = image_size[0] // stride
|
||||
fm_width = image_size[1] // stride
|
||||
cache_key = (fm_height, fm_width, stride)
|
||||
|
||||
if cache_key in self._center_cache:
|
||||
anchor_centers = self._center_cache[cache_key]
|
||||
else:
|
||||
y, x = np.mgrid[:fm_height, :fm_width]
|
||||
anchor_centers = np.stack((x, y), axis=-1).astype(np.float32)
|
||||
anchor_centers = (anchor_centers * stride).reshape(-1, 2)
|
||||
|
||||
if self._num_anchors > 1:
|
||||
anchor_centers = np.tile(anchor_centers[:, None, :], (1, self._num_anchors, 1)).reshape(-1, 2)
|
||||
|
||||
if len(self._center_cache) < 100:
|
||||
self._center_cache[cache_key] = anchor_centers
|
||||
|
||||
pos_indices = np.where(scores >= self.conf_thresh)[0]
|
||||
if len(pos_indices) == 0:
|
||||
continue
|
||||
|
||||
bboxes = distance2bbox(anchor_centers, bbox_preds)[pos_indices]
|
||||
scores_selected = scores[pos_indices]
|
||||
scores_list.append(scores_selected)
|
||||
bboxes_list.append(bboxes)
|
||||
|
||||
landmarks = distance2kps(anchor_centers, kps_preds)
|
||||
landmarks = landmarks.reshape((landmarks.shape[0], -1, 2))
|
||||
kpss_list.append(landmarks[pos_indices])
|
||||
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
scores_list, bboxes_list, kpss_list = self.postprocess(outputs, image_size=image.shape[:2])
|
||||
|
||||
# Handle case when no faces are detected
|
||||
if not scores_list:
|
||||
return []
|
||||
|
||||
scores = np.vstack(scores_list)
|
||||
scores_ravel = scores.ravel()
|
||||
order = scores_ravel.argsort()[::-1]
|
||||
|
||||
bboxes = np.vstack(bboxes_list) / resize_factor
|
||||
landmarks = np.vstack(kpss_list) / resize_factor
|
||||
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_supression(pre_det, threshold=self.nms_thresh)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
landmarks = landmarks[keep, :, :].astype(np.int32)
|
||||
|
||||
if 0 < max_num < detections.shape[0]:
|
||||
# Calculate area of detections
|
||||
area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == "max":
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(values)[::-1][:max_num]
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
227
uniface/detection/utils.py
Normal file
@@ -0,0 +1,227 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
# Aspect-ratio preserving resize
|
||||
im_ratio = float(frame.shape[0]) / frame.shape[1]
|
||||
model_ratio = height / width
|
||||
if im_ratio > model_ratio:
|
||||
new_height = height
|
||||
new_width = int(new_height / im_ratio)
|
||||
else:
|
||||
new_width = width
|
||||
new_height = int(new_width * im_ratio)
|
||||
|
||||
resize_factor = float(new_height) / frame.shape[0]
|
||||
resized_frame = cv2.resize(frame, (new_width, new_height))
|
||||
|
||||
# Create blank image and place resized image on it
|
||||
image = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
image[:new_height, :new_width, :] = resized_frame
|
||||
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
for i, j in itertools.product(range(map_height), range(map_width)):
|
||||
for min_size in min_sizes[k]:
|
||||
s_kx = min_size / image_size[1]
|
||||
s_ky = min_size / image_size[0]
|
||||
|
||||
dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
|
||||
dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= threshold)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
"""
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to bounding box.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded bounding boxes with shape (n, 4).
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
if max_shape is not None:
|
||||
x1 = x1.clamp(min=0, max=max_shape[1])
|
||||
y1 = y1.clamp(min=0, max=max_shape[0])
|
||||
x2 = x2.clamp(min=0, max=max_shape[1])
|
||||
y2 = y2.clamp(min=0, max=max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to keypoints.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = px.clamp(min=0, max=max_shape[1])
|
||||
py = py.clamp(min=0, max=max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
194
uniface/face_utils.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
|
||||
__all__ = [
|
||||
"face_alignment",
|
||||
"compute_similarity",
|
||||
"bbox_center_alignment",
|
||||
"transform_points_2d",
|
||||
]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
reference_alignment: np.ndarray = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Estimate the normalization transformation matrix for facial landmarks.
|
||||
|
||||
Args:
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
|
||||
np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the input landmark array does not have the shape (5, 2)
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
"""
|
||||
assert landmark.shape == (5, 2), "Landmark array must have shape (5, 2)."
|
||||
|
||||
# Handle both int and tuple inputs
|
||||
if isinstance(image_size, tuple):
|
||||
size = image_size[0] # Use width for ratio calculation
|
||||
else:
|
||||
size = image_size
|
||||
|
||||
assert size % 112 == 0 or size % 128 == 0, "Image size must be a multiple of 112 or 128."
|
||||
|
||||
if size % 112 == 0:
|
||||
ratio = float(size) / 112.0
|
||||
diff_x = 0.0
|
||||
else:
|
||||
ratio = float(size) / 128.0
|
||||
diff_x = 8.0 * ratio
|
||||
|
||||
# Adjust reference alignment based on ratio and diff_x
|
||||
alignment = reference_alignment * ratio
|
||||
alignment[:, 0] += diff_x
|
||||
|
||||
# Compute the transformation matrix
|
||||
transform = SimilarityTransform()
|
||||
transform.estimate(landmark, alignment)
|
||||
|
||||
matrix = transform.params[0:2, :]
|
||||
inverse_matrix = np.linalg.inv(transform.params)[0:2, :]
|
||||
|
||||
return matrix, inverse_matrix
|
||||
|
||||
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: Union[int, Tuple[int, int]] = 112,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array.
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The aligned face as a NumPy array.
|
||||
np.ndarray: The 2x3 transformation matrix used for alignment.
|
||||
"""
|
||||
# Get the transformation matrix
|
||||
M, M_inv = estimate_norm(landmark, image_size)
|
||||
|
||||
# Handle both int and tuple for warpAffine output size
|
||||
if isinstance(image_size, int):
|
||||
output_size = (image_size, image_size)
|
||||
else:
|
||||
output_size = image_size
|
||||
|
||||
# Warp the input image to align the face
|
||||
warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
|
||||
|
||||
return warped, M_inv
|
||||
|
||||
|
||||
def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
|
||||
"""Computing Similarity between two faces.
|
||||
|
||||
Args:
|
||||
feat1 (np.ndarray): First embedding.
|
||||
feat2 (np.ndarray): Second embedding.
|
||||
normalized (bool): Set True if the embeddings are already L2 normalized.
|
||||
|
||||
Returns:
|
||||
np.float32: Cosine similarity.
|
||||
"""
|
||||
feat1 = feat1.ravel()
|
||||
feat2 = feat2.ravel()
|
||||
if normalized:
|
||||
return np.dot(feat1, feat2)
|
||||
else:
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
|
||||
|
||||
def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
"""
|
||||
Apply center-based alignment, scaling, and rotation to an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
center (Tuple[float, float]): Center point (e.g., face center from bbox).
|
||||
output_size (int): Desired output image size (square).
|
||||
scale (float): Scaling factor to zoom in/out.
|
||||
rotation (float): Rotation angle in degrees (clockwise).
|
||||
|
||||
Returns:
|
||||
cropped (np.ndarray): Aligned and cropped image.
|
||||
M (np.ndarray): 2x3 affine transform matrix used.
|
||||
"""
|
||||
|
||||
# Convert rotation from degrees to radians
|
||||
rot = float(rotation) * np.pi / 180.0
|
||||
|
||||
# Scale the image
|
||||
t1 = SimilarityTransform(scale=scale)
|
||||
|
||||
# Translate the center point to the origin (after scaling)
|
||||
cx = center[0] * scale
|
||||
cy = center[1] * scale
|
||||
t2 = SimilarityTransform(translation=(-1 * cx, -1 * cy))
|
||||
|
||||
# Apply rotation around origin (center of face)
|
||||
t3 = SimilarityTransform(rotation=rot)
|
||||
|
||||
# Translate origin to center of output image
|
||||
t4 = SimilarityTransform(translation=(output_size / 2, output_size / 2))
|
||||
|
||||
# Combine all transformations in order: scale → center shift → rotate → recentralize
|
||||
t = t1 + t2 + t3 + t4
|
||||
|
||||
# Extract 2x3 affine matrix
|
||||
M = t.params[0:2]
|
||||
|
||||
# Warp the image using OpenCV
|
||||
cropped = cv2.warpAffine(image, M, (output_size, output_size), borderValue=0.0)
|
||||
|
||||
return cropped, M
|
||||
|
||||
|
||||
def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply a 2D affine transformation to an array of 2D points.
|
||||
|
||||
Args:
|
||||
points (np.ndarray): An (N, 2) array of 2D points.
|
||||
transform (np.ndarray): A (2, 3) affine transformation matrix.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Transformed (N, 2) array of points.
|
||||
"""
|
||||
transformed = np.zeros_like(points, dtype=np.float32)
|
||||
for i in range(points.shape[0]):
|
||||
point = np.array([points[i, 0], points[i, 1], 1.0], dtype=np.float32)
|
||||
result = np.dot(transform, point)
|
||||
transformed[i] = result[:2]
|
||||
|
||||
return transformed
|
||||
28
uniface/landmark/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .base import BaseLandmarker
|
||||
from .models import Landmark106
|
||||
|
||||
|
||||
def create_landmarker(method: str = "2d106det", **kwargs) -> BaseLandmarker:
|
||||
"""
|
||||
Factory function to create facial landmark predictors.
|
||||
|
||||
Args:
|
||||
method (str): Landmark prediction method. Options: '106'.
|
||||
**kwargs: Model-specific parameters.
|
||||
|
||||
Returns:
|
||||
Initialized landmarker instance.
|
||||
"""
|
||||
method = method.lower()
|
||||
if method == "2d106det":
|
||||
return Landmark106(**kwargs)
|
||||
else:
|
||||
available = ["2d106det"]
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = ["create_landmarker", "Landmark106", "BaseLandmarker"]
|
||||
32
uniface/landmark/base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predicts facial landmarks for a given face bounding box.
|
||||
|
||||
This method defines the standard interface for all landmark predictors.
|
||||
It takes a full image and a bounding box for a single face and returns
|
||||
the predicted keypoints for that face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (N, 2),
|
||||
where N is the number of landmarks.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
212
uniface/landmark/models.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ["Landmark"]
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
"""Facial landmark model for predicting 106 facial keypoints.
|
||||
|
||||
This class implements the BaseLandmarker and provides an end-to-end
|
||||
pipeline for 106-point facial landmark detection. It handles model
|
||||
loading, preprocessing of a face crop based on a bounding box,
|
||||
inference, and post-processing to map landmarks back to the
|
||||
original image coordinates.
|
||||
|
||||
Args:
|
||||
model_name (LandmarkWeights): The enum specifying the landmark model to load.
|
||||
Defaults to `LandmarkWeights.DEFAULT`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (192, 192).
|
||||
|
||||
Example:
|
||||
>>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
|
||||
>>> # bbox = [x1, y1, x2, y2]
|
||||
>>>
|
||||
>>> landmarker = Landmark106()
|
||||
>>> landmarks = landmarker.get_landmarks(image, bbox)
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(f"Initializing Facial Landmark with model={model_name}, input_size={input_size}")
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self):
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_metadata = self.session.get_inputs()[0]
|
||||
input_shape = input_metadata.shape
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
|
||||
|
||||
# Get input/output names
|
||||
self.input_names = [input.name for input in self.session.get_inputs()]
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
|
||||
# Determine landmark dimensions from output shape
|
||||
output_shape = self.session.get_outputs()[0].shape
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
|
||||
This method takes a face bounding box, performs a center alignment to
|
||||
warp the face into the model's required input size, and then creates
|
||||
a normalized blob ready for the ONNX session.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
||||
- The preprocessed image blob ready for inference.
|
||||
- The affine transformation matrix used for alignment.
|
||||
"""
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
||||
scale = self.input_size[0] / (max(width, height) * 1.5)
|
||||
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True,
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
|
||||
"""Converts raw model predictions back to original image coordinates.
|
||||
|
||||
This method reshapes the model's flat output array into landmark points,
|
||||
denormalizes them to the model's input space, and then applies an
|
||||
inverse affine transformation to map them back to the original image space.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Raw landmark coordinates from the model output.
|
||||
transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of landmark points in the original image's coordinates.
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= self.input_size[0] // 2
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
return landmarks
|
||||
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""Predicts facial landmarks for the given image and face bounding box.
|
||||
|
||||
This is the main public method that orchestrates the full pipeline of
|
||||
preprocessing, inference, and post-processing.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
face_detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Webcam not available.")
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Frame capture failed.")
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face["bbox"]
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
@@ -1,7 +1,25 @@
|
||||
import logging
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
Logger = logging.getLogger("retinaface")
|
||||
# Create logger for uniface
|
||||
Logger = logging.getLogger("uniface")
|
||||
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
|
||||
Logger.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Enable verbose logging for uniface.
|
||||
|
||||
Args:
|
||||
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
||||
|
||||
Example:
|
||||
>>> from uniface import enable_logging
|
||||
>>> enable_logging() # Show INFO logs
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
|
||||
Logger.addHandler(handler)
|
||||
Logger.setLevel(level)
|
||||
Logger.propagate = False
|
||||
|
||||
@@ -1,102 +1,115 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import requests
|
||||
|
||||
from uniface.log import Logger
|
||||
import uniface.constants as const
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
"""
|
||||
Ensures model weights are available by downloading if missing and verifying integrity with a SHA-256 hash.
|
||||
|
||||
Checks if the specified model weights file exists in `root`. If missing, downloads from a predefined URL.
|
||||
The file is then verified using its SHA-256 hash. If verification fails, the corrupted file is deleted,
|
||||
and an error is raised.
|
||||
|
||||
Args:
|
||||
model_name (str): Name of the model weights to verify or download.
|
||||
root (str, optional): Directory to store the model weights. Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is not found or if verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> # Download and verify 'retinaface_mnet025' weights
|
||||
>>> verify_model_weights('retinaface_mnet025')
|
||||
'/home/user/.uniface/models/retinaface_mnet025.onnx'
|
||||
|
||||
>>> # Use a custom directory
|
||||
>>> verify_model_weights('retinaface_r34', root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
os.makedirs(root, exist_ok=True)
|
||||
model_path = os.path.join(root, f'{model_name}.onnx')
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
url = const.MODEL_URLS.get(model_name)
|
||||
if not url:
|
||||
Logger.error(f"No URL found for model '{model_name}'")
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
Logger.info(f"Downloading '{model_name}' from {url}")
|
||||
download_file(url, model_path)
|
||||
Logger.info(f"Successfully '{model_name}' downloaded to {model_path}")
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning("Corrupted weight detected. Removing...")
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_names = [
|
||||
'retinaface_mnet025',
|
||||
'retinaface_mnet050',
|
||||
'retinaface_mnet_v1',
|
||||
'retinaface_mnet_v2',
|
||||
'retinaface_r18',
|
||||
'retinaface_r34'
|
||||
]
|
||||
|
||||
# Download each model in the list
|
||||
for model_name in model_names:
|
||||
model_path = verify_model_weights(model_name)
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
import uniface.constants as const
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ["verify_model_weights"]
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = "~/.uniface/models") -> str:
|
||||
"""
|
||||
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
|
||||
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`), this function checks if
|
||||
the corresponding `.onnx` weight file exists locally. If not, it downloads the file from a predefined URL.
|
||||
After download, the file’s integrity is verified using a SHA-256 hash. If verification fails, the file is deleted
|
||||
and an error is raised.
|
||||
|
||||
Args:
|
||||
model_name (Enum): Model weight identifier (e.g., `RetinaFaceWeights.MNET_V2`, `ArcFaceWeights.RESNET`, etc.).
|
||||
root (str, optional): Directory to store or locate the model weights. Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is unknown or SHA-256 verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> from uniface.models import RetinaFaceWeights, verify_model_weights
|
||||
>>> verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
|
||||
|
||||
>>> verify_model_weights(RetinaFaceWeights.RESNET34, root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
os.makedirs(root, exist_ok=True)
|
||||
|
||||
# Keep model_name as enum for dictionary lookup
|
||||
url = const.MODEL_URLS.get(model_name)
|
||||
if not url:
|
||||
Logger.error(f"No URL found for model '{model_name}'")
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
file_ext = os.path.splitext(url)[1]
|
||||
model_path = os.path.normpath(os.path.join(root, f"{model_name.value}{file_ext}"))
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
Logger.info(f"Downloading model '{model_name}' from {url}")
|
||||
try:
|
||||
download_file(url, model_path)
|
||||
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to download model '{model_name}': {e}")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'") from e
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning("Corrupted weight detected. Removing...")
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with (
|
||||
open(dest_path, "wb") as file,
|
||||
tqdm(
|
||||
desc=f"Downloading {dest_path}",
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as progress,
|
||||
):
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
progress.update(len(chunk))
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}") from e
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_names = [model.value for model in const.RetinaFaceWeights]
|
||||
|
||||
# Download each model in the list
|
||||
for model_name in model_names:
|
||||
model_path = verify_model_weights(model_name)
|
||||
|
||||
102
uniface/onnx_utils.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Utilities for ONNX Runtime configuration and provider selection.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""
|
||||
Get list of available ONNX Runtime execution providers for the current platform.
|
||||
|
||||
Automatically detects and prioritizes hardware acceleration:
|
||||
- CoreML on Apple Silicon (M1/M2/M3/M4)
|
||||
- CUDA on NVIDIA GPUs
|
||||
- CPU as fallback (always available)
|
||||
|
||||
Returns:
|
||||
List[str]: Ordered list of execution providers to use
|
||||
|
||||
Examples:
|
||||
>>> providers = get_available_providers()
|
||||
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On CPU-only: ['CPUExecutionProvider']
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = []
|
||||
|
||||
# Priority order: CoreML > CUDA > CPU
|
||||
if "CoreMLExecutionProvider" in available:
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
Logger.info("CoreML acceleration enabled (Apple Silicon)")
|
||||
|
||||
if "CUDAExecutionProvider" in available:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
Logger.info("CUDA acceleration enabled (NVIDIA GPU)")
|
||||
|
||||
# CPU is always available as fallback
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
if len(providers) == 1:
|
||||
Logger.info("Using CPU execution (no hardware acceleration detected)")
|
||||
|
||||
return providers
|
||||
|
||||
|
||||
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
|
||||
"""
|
||||
Create an ONNX Runtime inference session with optimal provider selection.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the ONNX model file
|
||||
providers (List[str], optional): List of providers to use.
|
||||
If None, automatically detects best available providers.
|
||||
|
||||
Returns:
|
||||
ort.InferenceSession: Configured ONNX Runtime session
|
||||
|
||||
Raises:
|
||||
RuntimeError: If session creation fails
|
||||
|
||||
Examples:
|
||||
>>> session = create_onnx_session("model.onnx")
|
||||
>>> # Automatically uses best available providers
|
||||
|
||||
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
|
||||
>>> # Force CPU-only execution
|
||||
"""
|
||||
if providers is None:
|
||||
providers = get_available_providers()
|
||||
|
||||
# Suppress ONNX Runtime warnings (e.g., CoreML partition warnings)
|
||||
# Log levels: 0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # Only show ERROR and FATAL
|
||||
|
||||
try:
|
||||
session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
|
||||
active_provider = session.get_providers()[0]
|
||||
Logger.debug(f"Session created with provider: {active_provider}")
|
||||
|
||||
# Show user-friendly message about which provider is being used
|
||||
provider_names = {
|
||||
"CoreMLExecutionProvider": "CoreML (Apple Silicon)",
|
||||
"CUDAExecutionProvider": "CUDA (NVIDIA GPU)",
|
||||
"CPUExecutionProvider": "CPU",
|
||||
}
|
||||
provider_display = provider_names.get(active_provider, active_provider)
|
||||
print(f"Model loaded ({provider_display})")
|
||||
|
||||
return session
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}") from e
|
||||
64
uniface/recognition/__init__.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from .base import BaseRecognizer
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def create_recognizer(method: str = "arcface", **kwargs) -> BaseRecognizer:
|
||||
"""
|
||||
Factory function to create face recognizers.
|
||||
|
||||
This function initializes and returns a face recognizer instance based on the
|
||||
specified method. It acts as a high-level interface to the underlying
|
||||
model classes like ArcFace, MobileFace, etc.
|
||||
|
||||
Args:
|
||||
method (str): The recognition method to use.
|
||||
Options: 'arcface' (default), 'mobileface', 'sphereface'.
|
||||
**kwargs: Model-specific parameters passed to the recognizer's constructor.
|
||||
For example, `model_name` can be used to select a specific
|
||||
pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`).
|
||||
|
||||
Returns:
|
||||
BaseRecognizer: An initialized recognizer instance ready for use.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default ArcFace recognizer
|
||||
>>> recognizer = create_recognizer()
|
||||
|
||||
>>> # Create a specific MobileFace recognizer
|
||||
>>> from uniface.constants import MobileFaceWeights
|
||||
>>> recognizer = create_recognizer(
|
||||
... 'mobileface',
|
||||
... model_name=MobileFaceWeights.MNET_V2
|
||||
... )
|
||||
|
||||
>>> # Create a SphereFace recognizer
|
||||
>>> recognizer = create_recognizer('sphereface')
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == "arcface":
|
||||
return ArcFace(**kwargs)
|
||||
elif method == "mobileface":
|
||||
return MobileFace(**kwargs)
|
||||
elif method == "sphereface":
|
||||
return SphereFace(**kwargs)
|
||||
else:
|
||||
available = ["arcface", "mobileface", "sphereface"]
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_recognizer",
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
"BaseRecognizer",
|
||||
]
|
||||
156
uniface/recognition/base.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseRecognizer(ABC):
|
||||
"""
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
Initializes the model. Subclasses must call this.
|
||||
|
||||
Args:
|
||||
model_path (str): The direct path to the verified ONNX model.
|
||||
preprocessing (PreprocessConfig): The configuration for preprocessing.
|
||||
"""
|
||||
self.input_mean = preprocessing.input_mean
|
||||
self.input_std = preprocessing.input_std
|
||||
self.input_size = preprocessing.input_size
|
||||
|
||||
self.model_path = model_path
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Loads the ONNX model and prepares it for inference.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
# Initialize model session with available providers
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Extract input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
self.input_name = input_cfg.name
|
||||
|
||||
# Verify input dimensions match our configuration
|
||||
input_shape = input_cfg.shape
|
||||
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
if model_input_size != self.input_size:
|
||||
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
|
||||
|
||||
# Extract output configuration
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
self.output_shape = self.session.get_outputs()[0].shape
|
||||
|
||||
assert len(self.output_names) == 1, "Expected only one output node."
|
||||
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load face encoder model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
||||
|
||||
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the image: resize, normalize, and convert it to a blob.
|
||||
|
||||
Args:
|
||||
face_img: Input image in BGR format.
|
||||
|
||||
Returns:
|
||||
Preprocessed image as a NumPy array ready for inference.
|
||||
"""
|
||||
resized_img = cv2.resize(face_img, self.input_size)
|
||||
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
self.input_std, dtype=np.float32
|
||||
)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
||||
blob = np.expand_dims(blob, axis=0) # NCHW
|
||||
else:
|
||||
# Single-value normalization
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
resized_img,
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True, # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return blob
|
||||
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
|
||||
"""
|
||||
Extracts face embedding from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
|
||||
|
||||
Returns:
|
||||
Face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
# If landmarks are provided, align the face first
|
||||
if landmarks is not None:
|
||||
aligned_face, _ = face_alignment(image, landmarks, image_size=self.input_size)
|
||||
else:
|
||||
# Assume image is already aligned
|
||||
aligned_face = image
|
||||
|
||||
# Generate embedding from aligned face
|
||||
face_blob = self.preprocess(aligned_face)
|
||||
embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
|
||||
|
||||
return embedding
|
||||
|
||||
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Extracts a l2 normalized face embedding vector from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format).
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
Normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
embedding = self.get_embedding(image, landmarks)
|
||||
norm = np.linalg.norm(embedding)
|
||||
return embedding / norm if norm > 0 else embedding
|
||||
103
uniface/recognition/models.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from .base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
|
||||
|
||||
|
||||
class ArcFace(BaseRecognizer):
|
||||
"""ArcFace model for robust face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for ArcFace models. It handles the loading of specific
|
||||
ArcFace weights and sets up the appropriate default preprocessing.
|
||||
|
||||
Args:
|
||||
model_name (ArcFaceWeights): The specific ArcFace model variant to use.
|
||||
Defaults to `ArcFaceWeights.MNET`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for ArcFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import ArcFace
|
||||
>>> recognizer = ArcFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class MobileFace(BaseRecognizer):
|
||||
"""Lightweight MobileFaceNet model for fast face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for MobileFaceNet models. It is optimized for speed,
|
||||
making it suitable for edge devices.
|
||||
|
||||
Args:
|
||||
model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use.
|
||||
Defaults to `MobileFaceWeights.MNET_V2`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for MobileFaceNet is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import MobileFace
|
||||
>>> recognizer = MobileFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class SphereFace(BaseRecognizer):
|
||||
"""SphereFace model using angular margin for face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for SphereFace models, which were among the first to
|
||||
introduce angular margin loss functions.
|
||||
|
||||
Args:
|
||||
model_name (SphereFaceWeights): The specific SphereFace model variant to use.
|
||||
Defaults to `SphereFaceWeights.SPHERE20`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for SphereFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import SphereFace
|
||||
>>> recognizer = SphereFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
@@ -1,256 +0,0 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List, Optional, Literal
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from uniface.common import (
|
||||
nms,
|
||||
resize_image,
|
||||
decode_boxes,
|
||||
generate_anchors,
|
||||
decode_landmarks
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace:
|
||||
"""
|
||||
A class for face detection using the RetinaFace model.
|
||||
|
||||
Args:
|
||||
model (str): Path or identifier of the model weights.
|
||||
conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
|
||||
dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
|
||||
input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Confidence threshold for filtering detections.
|
||||
nms_thresh (float): Threshold for NMS to remove duplicate detections.
|
||||
pre_nms_topk (int): Maximum detections to consider before applying NMS.
|
||||
post_nms_topk (int): Maximum detections retained after applying NMS.
|
||||
dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
|
||||
input_size (Tuple[int, int]): The model's input image size.
|
||||
_model_path (str): Path to the model weights.
|
||||
_priors (torch.Tensor): Precomputed anchor boxes for static input size.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
pre_nms_topk: int = 5000,
|
||||
post_nms_topk: int = 750,
|
||||
dynamic_size: Optional[bool] = False,
|
||||
input_size: Optional[Tuple[int, int]] = (640, 640), # Default input size if dynamic_size=False
|
||||
) -> None:
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.pre_nms_topk = pre_nms_topk
|
||||
self.post_nms_topk = post_nms_topk
|
||||
self.dynamic_size = dynamic_size
|
||||
self.input_size = input_size
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not dynamic_size and input_size is not None:
|
||||
self._priors = generate_anchors(image_size=input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(model_path)
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}")
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess input image for model inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
|
||||
"""
|
||||
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(None, {self.input_name: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: Optional[int] = 0,
|
||||
metric: Literal["default", "max"] = "default",
|
||||
center_weight: Optional[float] = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (height, width, channels).
|
||||
max_num (int, optional): Maximum number of detections to return. Defaults to 1.
|
||||
metric (str, optional): Metric for ranking detections when `max_num` is specified.
|
||||
Options:
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float, optional): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Detection results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
|
||||
if self.dynamic_size:
|
||||
height, width, _ = image.shape
|
||||
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
|
||||
resize_factor = 1.0 # No resizing
|
||||
else:
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
height, width, _ = image.shape
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocessing
|
||||
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
|
||||
|
||||
if max_num > 0 and detections.shape[0] > max_num:
|
||||
# Calculate area of detections
|
||||
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (height // 2, width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == 'max':
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(scores)[::-1][:max_num]
|
||||
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
|
||||
landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = nms(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Scale bounding boxes and landmarks to the original image size."""
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
landmark_scale = np.array([shape[0], shape[1]] * 5)
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
@@ -1,15 +0,0 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/MIT
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "0.1.1"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
@@ -1,38 +1,58 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
def draw_detections(image, detections, vis_threshold=0.6):
|
||||
def draw_detections(
|
||||
image: np.ndarray,
|
||||
bboxes: Union[np.ndarray, List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[np.ndarray, List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6,
|
||||
):
|
||||
"""
|
||||
Draw bounding boxes and landmarks on the image.
|
||||
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
|
||||
|
||||
Args:
|
||||
image (ndarray): Image to draw detections on.
|
||||
detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
|
||||
vis_threshold (float): Confidence threshold for filtering detections.
|
||||
image (np.ndarray): The image to draw on.
|
||||
bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
|
||||
scores (list or np.ndarray): A list of confidence scores.
|
||||
landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
|
||||
vis_threshold (float): Confidence threshold for filtering which detections to draw.
|
||||
"""
|
||||
|
||||
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
# Unpack detections
|
||||
boxes, landmarks = detections
|
||||
scores = boxes[:, 4]
|
||||
# Filter detections by score
|
||||
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
||||
|
||||
# Filter detections by confidence threshold
|
||||
filtered = scores >= vis_threshold
|
||||
boxes = boxes[filtered, :4].astype(np.int32)
|
||||
landmarks = landmarks[filtered]
|
||||
scores = scores[filtered]
|
||||
# Draw the filtered detections
|
||||
for i in keep_indices:
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
score = scores[i]
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
|
||||
print(f"#faces: {len(scores)}")
|
||||
# Calculate adaptive thickness
|
||||
thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100))
|
||||
|
||||
# Draw bounding boxes, scores, and landmarks
|
||||
for box, score, landmark in zip(boxes, scores, landmarks):
|
||||
cv2.rectangle(image, box[:2], box[2:], (0, 0, 255), 2)
|
||||
cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
for point, color in zip(landmark, _colors):
|
||||
cv2.circle(image, tuple(point), 2, color, -1)
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
|
||||
|
||||
# Draw score
|
||||
cv2.putText(
|
||||
image,
|
||||
f"{score:.2f}",
|
||||
(bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(255, 255, 255),
|
||||
thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)
|
||||
|
||||