Compare commits
85 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 | ||
|
|
da8a5cf35b | ||
|
|
3982d677a9 | ||
|
|
f4458f0550 | ||
|
|
637316f077 | ||
|
|
6b1d2a1ce6 | ||
|
|
a5e97ac484 | ||
|
|
0c93598007 | ||
|
|
779952e3f8 | ||
|
|
39b50b62bd | ||
|
|
db7532ecf1 | ||
|
|
4b8dc2c0f9 | ||
|
|
0a2a10e165 | ||
|
|
84cda5f56c | ||
|
|
0771a7959a | ||
|
|
15947eb605 | ||
|
|
1ccc4f6b77 | ||
|
|
189755a1a6 | ||
|
|
11363fe0a8 | ||
|
|
fe3e70a352 | ||
|
|
8e218321a4 | ||
|
|
2c78f39e5d | ||
|
|
df673c4a3f | ||
|
|
496de7a491 | ||
|
|
89a05e4689 | ||
|
|
d3c2d959d0 | ||
|
|
f3d28d5ef5 | ||
|
|
c9f8215e28 | ||
|
|
0aea17d14d | ||
|
|
3cf13f70d4 | ||
|
|
666438909d | ||
|
|
77f14a616a | ||
|
|
98f8acc51b | ||
|
|
30a177981d | ||
|
|
0417f7531f | ||
|
|
b15504dfc5 | ||
|
|
fb29a919b1 | ||
|
|
b35b1a3f7c | ||
|
|
5bd6bb1673 | ||
|
|
cf5d06729d | ||
|
|
29964df259 | ||
|
|
eef4a0624a | ||
|
|
6a7ba6fc0a | ||
|
|
282737e0e9 | ||
|
|
597c86f997 | ||
|
|
d0446827e9 | ||
|
|
5f88345830 | ||
|
|
ab56589f77 | ||
|
|
d05e609ddf | ||
|
|
cee2b692ad | ||
|
|
67bb13c082 | ||
|
|
f9b4ea492b | ||
|
|
08f79e7d47 | ||
|
|
3a0b0e21b1 | ||
|
|
85cf413cb8 | ||
|
|
d1830c7058 | ||
|
|
204b1d75e1 | ||
|
|
777333eb2d | ||
|
|
c2d52e305a | ||
|
|
cd3ff79c2e | ||
|
|
fbca77e050 | ||
|
|
8dd7f3f101 | ||
|
|
0ae5714f99 | ||
|
|
64c9c2f452 | ||
|
|
31f97da783 | ||
|
|
d586cffb3a | ||
|
|
4256407044 | ||
|
|
e54607292f | ||
|
|
56ac8af432 | ||
|
|
904ba2be83 | ||
|
|
ad661da2f3 | ||
|
|
084b1132ad | ||
|
|
f22e8f01fb | ||
|
|
da09d7497d | ||
|
|
7330b4fd6e | ||
|
|
fa179c6a7a | ||
|
|
9acc6e344c | ||
|
|
9819520d76 | ||
|
|
db544d1a29 | ||
|
|
12ccac11b7 | ||
|
|
a158e47f52 | ||
|
|
5b4148f824 | ||
|
|
6a69739e8e |
BIN
.github/logos/logo_preview.jpg
vendored
Normal file
|
After Width: | Height: | Size: 826 KiB |
BIN
.github/logos/logo_readme.png
vendored
Normal file
|
After Width: | Height: | Size: 563 KiB |
BIN
.github/logos/logo_web.webp
vendored
Normal file
|
After Width: | Height: | Size: 33 KiB |
64
.github/workflows/build.yml
vendored
@@ -1,64 +0,0 @@
|
||||
name: Build, Test, and Publish
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- "v*.*.*" # Trigger publish on version tags
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev] || pip install pytest # Use extras_require if available
|
||||
|
||||
- name: Run Tests
|
||||
run: |
|
||||
pytest
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build # Publish only if tests pass
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10" # Use a single Python version for publishing
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build Package
|
||||
run: python -m build
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
87
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Check ONNX Runtime providers
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: |
|
||||
python -m pip install twine
|
||||
twine check dist/*
|
||||
|
||||
- name: Upload build artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
108
.github/workflows/publish.yml
vendored
Normal file
@@ -0,0 +1,108 @@
|
||||
name: Publish to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
echo "pyproject.toml version: $PYPROJECT_VERSION"
|
||||
|
||||
- name: Verify version match
|
||||
run: |
|
||||
if [ "${{ steps.get_version.outputs.tag_version }}" != "${{ steps.get_version.outputs.version }}" ]; then
|
||||
echo "Error: Tag version (${{ steps.get_version.outputs.tag_version }}) does not match pyproject.toml version (${{ steps.get_version.outputs.version }})"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validation passed: ${{ steps.get_version.outputs.version }}"
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[dev]
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/project/uniface/
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install build twine
|
||||
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
|
||||
- name: Check package
|
||||
run: twine check dist/*
|
||||
|
||||
- name: Publish to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: twine upload dist/*
|
||||
|
||||
- name: Create GitHub Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
tmp_*
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
63
CONTRIBUTING.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Contributing to UniFace
|
||||
|
||||
Thank you for considering contributing to UniFace! We welcome contributions of all kinds.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
- Use GitHub Issues to report bugs or suggest features
|
||||
- Include clear descriptions and reproducible examples
|
||||
- Check existing issues before creating new ones
|
||||
|
||||
### Pull Requests
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
### Code Style
|
||||
|
||||
- Follow PEP8 guidelines
|
||||
- Use type hints (Python 3.10+)
|
||||
- Write docstrings for public APIs
|
||||
- Keep code simple and readable
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| Face Alignment | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| Face Recognition | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| Face Verification | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| Face Search | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
457
MODELS.md
Normal file
@@ -0,0 +1,457 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ---- | ------ | ------ | ------ | ------------------------------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| `YOLOV5S` ⭐ | 28MB | 94.33% | 92.61% | 83.15% | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight/Mobile
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5N,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# Real-time detection (recommended)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5M,
|
||||
conf_thresh=0.6
|
||||
)
|
||||
|
||||
# Detect faces with landmarks
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks (5, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: (gender, age_in_years)
|
||||
# gender: 0 for Female, 1 for Male
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation Models
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Real-time gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* | Use Case |
|
||||
| -------------- | ------ | ------- | ----- | ----------------------------- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 | Balanced accuracy/speed |
|
||||
| `RESNET34` ⭐ | 24.8M | 81.6 MB | 11.33 | **Recommended default** |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 | High accuracy |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 | Mobile/Edge devices |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 | Lightweight/Real-time |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
import numpy as np
|
||||
|
||||
# Default (recommended)
|
||||
gaze_estimator = MobileGaze() # Uses RESNET34
|
||||
|
||||
# Lightweight model
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Estimate gaze from face crop
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(pitch):.1f}°, Yaw: {np.degrees(yaw):.1f}°")
|
||||
```
|
||||
|
||||
**Note**: Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes | Use Case |
|
||||
| -------------- | ------ | ------- | ------- | ----------------------------- |
|
||||
| `RESNET18` ⭐ | 13.3M | 50.7 MB | 19 | **Recommended default** |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 | Higher accuracy |
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
1. Background
|
||||
2. Skin
|
||||
3. Left Eyebrow
|
||||
4. Right Eyebrow
|
||||
5. Left Eye
|
||||
6. Right Eye
|
||||
7. Eye Glasses
|
||||
8. Left Ear
|
||||
9. Right Ear
|
||||
10. Ear Ring
|
||||
11. Nose
|
||||
12. Mouth
|
||||
13. Upper Lip
|
||||
14. Lower Lip
|
||||
15. Neck
|
||||
16. Neck Lace
|
||||
17. Cloth
|
||||
18. Hair
|
||||
19. Hat
|
||||
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
import cv2
|
||||
|
||||
# Default (recommended)
|
||||
parser = BiSeNet() # Uses RESNET18
|
||||
|
||||
# Higher accuracy model
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
# Parse face image (already cropped)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# mask shape: (H, W) with values 0-18 representing classes
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
**Applications:**
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
**Note**: Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
522
QUICKSTART.md
Normal file
@@ -0,0 +1,522 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender_str}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Gaze Estimation (2 minutes)
|
||||
|
||||
Estimate where a person is looking:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Estimate gaze for each face
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: pitch=5.2°, yaw=-12.3°
|
||||
Face 2: pitch=-8.1°, yaw=15.7°
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Face Parsing (2 minutes)
|
||||
|
||||
Segment face into semantic components (skin, eyes, nose, mouth, hair, etc.):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet() # Uses ResNet18 by default
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Convert back to BGR for saving
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed_face.jpg", vis_bgr)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Detected 12 facial components
|
||||
```
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
- Background, Skin, Eyebrows (L/R), Eyes (L/R), Eye Glasses
|
||||
- Ears (L/R), Ear Ring, Nose, Mouth, Lips (Upper/Lower)
|
||||
- Neck, Neck Lace, Cloth, Hair, Hat
|
||||
|
||||
---
|
||||
|
||||
## 9. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD, YOLOv5Face
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# Real-time with high accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
### Gaze Estimation Models
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (recommended)
|
||||
gaze_estimator = MobileGaze() # Uses RESNET34
|
||||
|
||||
# Lightweight (mobile/edge devices)
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# High accuracy
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
### Face Parsing Models
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (recommended, 50.7 MB)
|
||||
parser = BiSeNet() # Uses RESNET18
|
||||
|
||||
# Higher accuracy (89.2 MB)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
### Jupyter Notebook Examples
|
||||
|
||||
Explore interactive examples for common tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
|
||||
|
||||
### Additional Resources
|
||||
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation)
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
644
README.md
@@ -1,239 +1,575 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://www.python.org/dev/peps/pep-0008/)
|
||||
[](https://github.com/yakhyo/uniface/releases)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
</div>
|
||||
|
||||
**uniface** is a lightweight face detection library designed for high-performance face localization and landmark detection. The library supports ONNX models and provides utilities for bounding box visualization and landmark plotting. To train RetinaFace model, see https://github.com/yakhyo/retinaface-pytorch.
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
- [ ] Age and gender detection (Planned).
|
||||
- [ ] Face recognition (Planned).
|
||||
- [x] High-speed face detection using ONNX models (Added: 2024-11-20).
|
||||
- [x] Accurate facial landmark localization (e.g., eyes, nose, and mouth) (Added: 2024-11-20).
|
||||
- [x] Easy-to-use API for inference and visualization (Added: 2024-11-20).
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace, SCRFD, and YOLOv5-Face models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Face Parsing**: BiSeNet-based semantic segmentation with 19 facial component classes
|
||||
- **Gaze Estimation**: Real-time gaze direction prediction with MobileGaze
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Using pip
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Local installation using pip
|
||||
### Platform-Specific Installation
|
||||
|
||||
**Clone the repository**
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
```
|
||||
|
||||
**Install using pip**
|
||||
|
||||
```bash
|
||||
pip install .
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Initialize the Model
|
||||
### Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize the RetinaFace model
|
||||
uniface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2", # Model name
|
||||
conf_thresh=0.5, # Confidence threshold
|
||||
pre_nms_topk=5000, # Pre-NMS Top-K detections
|
||||
nms_thresh=0.4, # NMS IoU threshold
|
||||
post_nms_topk=750 # Post-NMS Top-K detections
|
||||
# Initialize detector
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"{gender_str}, {age} years old")
|
||||
```
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
|
||||
# Visualize
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
```
|
||||
|
||||
### Face Parsing
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet() # Uses ResNet18 by default
|
||||
|
||||
# Parse face image (already cropped)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
import cv2
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# mask contains 19 classes: skin, eyes, nose, mouth, hair, etc.
|
||||
print(f"Unique classes: {len(np.unique(mask))}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS, # SCRFDWeights.SCRFD_500M_KPS
|
||||
conf_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
# Or with defaults settings: detector = SCRFD()
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Run Inference
|
||||
### Direct Model Instantiation
|
||||
|
||||
Inference on image:
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, YOLOv5Face, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
# Or detector = RetinaFace()
|
||||
|
||||
# YOLOv5-Face detection
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
# Or detector = YOLOv5Face
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: retinaface, scrfd, yolov5face
|
||||
```
|
||||
|
||||
### Key Parameters (quick reference)
|
||||
|
||||
**Detection**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
|
||||
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
|
||||
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `conf_thresh=0.6`, `nms_thresh=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
|
||||
|
||||
**Recognition**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ----------------------------------------- | ------------------------------------- |
|
||||
| `ArcFace` | `model_name=ArcFaceWeights.MNET` | Returns 512-dim normalized embeddings |
|
||||
| `MobileFace` | `model_name=MobileFaceWeights.MNET_V2` | Lightweight embeddings |
|
||||
| `SphereFace` | `model_name=SphereFaceWeights.SPHERE20` | Angular softmax variant |
|
||||
|
||||
**Landmark & Attributes**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| --------------- | --------------------------------------------------------------------- | --------------------------------------- |
|
||||
| `Landmark106` | No required params | 106-point landmarks |
|
||||
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Requires bbox; ONNXRuntime |
|
||||
| `Emotion` | `model_weights=DDAMFNWeights.AFFECNET7`, `input_size=(112, 112)` | Requires 5-point landmarks; TorchScript |
|
||||
|
||||
**Gaze Estimation**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ------------- | ------------------------------------------ | ------------------------------------ |
|
||||
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns (pitch, yaw) angles in radians; trained on Gaze360 |
|
||||
|
||||
**Face Parsing**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ---------- | ---------------------------------------- | ------------------------------------ |
|
||||
| `BiSeNet` | `model_name=ParsingWeights.RESNET18`, `input_size=(512, 512)` | 19 facial component classes; BiSeNet architecture with ResNet backbone |
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
| ------------------ | ------ | ------ | ------ | ---------------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| yolov5n_face | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| yolov5s_face | 94.33% | 92.61% | 83.15% | Real-time + accuracy |
|
||||
| yolov5m_face | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
_Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714), [YOLOv5-Face](https://arxiv.org/abs/2105.12931)_
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
|
||||
### Jupyter Notebooks
|
||||
|
||||
Interactive examples covering common face analysis tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction from face images | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
|
||||
|
||||
### Webcam Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Load an image
|
||||
image_path = "assets/test.jpg"
|
||||
original_image = cv2.imread(image_path)
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(original_image)
|
||||
|
||||
# Visualize results
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold=0.6)
|
||||
|
||||
# Save the output image
|
||||
output_path = "output.jpg"
|
||||
cv2.imwrite(output_path, original_image)
|
||||
print(f"Saved output image to {output_path}")
|
||||
```
|
||||
|
||||
Inference on video:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize the webcam
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Error: Unable to access the webcam.")
|
||||
exit()
|
||||
|
||||
while True:
|
||||
# Capture a frame from the webcam
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to read frame.")
|
||||
break
|
||||
|
||||
# Perform inference
|
||||
boxes, landmarks = uniface_inference.detect(frame)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw detections on the frame
|
||||
draw_detections(frame, (boxes, landmarks), vis_threshold=0.6)
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Display the output
|
||||
cv2.imshow("Webcam Inference", frame)
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Exit if 'q' is pressed
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release the webcam and close all OpenCV windows
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
### Evaluation results of available models on WiderFace
|
||||
## Advanced Configuration
|
||||
|
||||
| RetinaFace Models | Easy | Medium | Hard |
|
||||
| ------------------ | ---------- | ---------- | ---------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% |
|
||||
| retinaface_mnet050 | 89.42% | 87.97% | 82.40% |
|
||||
| retinaface_mnet_v1 | 90.59% | 89.14% | 84.13% |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% |
|
||||
| retinaface_r18 | 92.50% | 91.02% | 86.63% |
|
||||
| retinaface_r34 | **94.16%** | **93.12%** | **88.90%** |
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
## API Reference
|
||||
|
||||
### `RetinaFace` Class
|
||||
|
||||
#### Initialization
|
||||
```python
|
||||
RetinaFace(
|
||||
model: str,
|
||||
conf_thresh: float = 0.5,
|
||||
pre_nms_topk: int = 5000,
|
||||
nms_thresh: float = 0.4,
|
||||
post_nms_topk: int = 750
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
**Parameters**:
|
||||
- `model` *(str)*: Name of the model to use. Supported models:
|
||||
- `retinaface_mnet025`, `retinaface_mnet050`, `retinaface_mnet_v1`, `retinaface_mnet_v2`
|
||||
- `retinaface_r18`, `retinaface_r34`
|
||||
- `conf_thresh` *(float, default=0.5)*: Minimum confidence score for detections.
|
||||
- `pre_nms_topk` *(int, default=5000)*: Max detections to keep before NMS.
|
||||
- `nms_thresh` *(float, default=0.4)*: IoU threshold for Non-Maximum Suppression.
|
||||
- `post_nms_topk` *(int, default=750)*: Max detections to keep after NMS.
|
||||
### Logging Configuration
|
||||
|
||||
---
|
||||
|
||||
### `detect` Method
|
||||
```python
|
||||
detect(
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: str = "default",
|
||||
center_weight: float = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Detects faces in the given image and returns bounding boxes and landmarks.
|
||||
|
||||
**Parameters**:
|
||||
- `image` *(np.ndarray)*: Input image in BGR format.
|
||||
- `max_num` *(int, default=0)*: Maximum number of faces to return. `0` means return all.
|
||||
- `metric` *(str, default="default")*: Metric for prioritizing detections:
|
||||
- `"default"`: Prioritize detections closer to the image center.
|
||||
- `"max"`: Prioritize larger bounding box areas.
|
||||
- `center_weight` *(float, default=2.0)*: Weight for prioritizing center-aligned faces.
|
||||
|
||||
**Returns**:
|
||||
- `bounding_boxes` *(np.ndarray)*: Array of detections as `[x_min, y_min, x_max, y_max, confidence]`.
|
||||
- `landmarks` *(np.ndarray)*: Array of landmarks as `[(x1, y1), ..., (x5, y5)]`.
|
||||
|
||||
---
|
||||
|
||||
### Visualization Utilities
|
||||
## Testing
|
||||
|
||||
#### `draw_detections`
|
||||
```python
|
||||
draw_detections(
|
||||
image: np.ndarray,
|
||||
detections: Tuple[np.ndarray, np.ndarray],
|
||||
vis_threshold: float
|
||||
) -> None
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
**Description**:
|
||||
Draws bounding boxes and landmarks on the given image.
|
||||
---
|
||||
|
||||
**Parameters**:
|
||||
- `image` *(np.ndarray)*: The input image in BGR format.
|
||||
- `detections` *(Tuple[np.ndarray, np.ndarray])*: A tuple of bounding boxes and landmarks.
|
||||
- `vis_threshold` *(float)*: Minimum confidence score for visualization.
|
||||
## Development
|
||||
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── parsing/ # Face parsing
|
||||
│ ├── gaze/ # Gaze estimation
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet face parsing training code and pretrained weights
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions to enhance the library! Feel free to:
|
||||
|
||||
- Submit bug reports or feature requests.
|
||||
- Fork the repository and create a pull request.
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
- Based on the RetinaFace model for face detection ([https://github.com/yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)).
|
||||
- Inspired by InsightFace and other face detection projects.
|
||||
|
||||
---
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
|
||||
BIN
assets/alignment_result.png
Normal file
|
After Width: | Height: | Size: 996 KiB |
BIN
assets/einstien.png
Normal file
|
After Width: | Height: | Size: 1.3 MiB |
BIN
assets/scientists.png
Normal file
|
After Width: | Height: | Size: 1.9 MiB |
BIN
assets/test_images/image0.jpg
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
assets/test_images/image1.jpg
Normal file
|
After Width: | Height: | Size: 6.6 KiB |
BIN
assets/test_images/image2.jpg
Normal file
|
After Width: | Height: | Size: 9.1 KiB |
BIN
assets/test_images/image3.jpg
Normal file
|
After Width: | Height: | Size: 9.8 KiB |
BIN
assets/test_images/image4.jpg
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
assets/test_result.png
Normal file
|
After Width: | Height: | Size: 1.1 MiB |
0
examples/.gitkeep
Normal file
239
examples/face_alignment.ipynb
Normal file
327
examples/face_analyzer.ipynb
Normal file
311
examples/face_detection.ipynb
Normal file
387
examples/face_parsing.ipynb
Normal file
368
examples/face_search.ipynb
Normal file
273
examples/face_verification.ipynb
Normal file
271
examples/gaze_estimation.ipynb
Normal file
102
pyproject.toml
Normal file
@@ -0,0 +1,102 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.5.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
"deep-learning",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = { where = ["."], include = ["uniface*"] } }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"*.egg-info",
|
||||
".venv",
|
||||
"venv",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
"*.ipynb",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = ["uniface"]
|
||||
@@ -1,8 +1,8 @@
|
||||
pytest
|
||||
numpy
|
||||
opencv-python
|
||||
opencv-python-headless
|
||||
onnx
|
||||
onnxruntime
|
||||
requests
|
||||
torch
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
79
scripts/README.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `run_gaze_estimation.py` | Gaze direction estimation |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Emotion detection
|
||||
python scripts/run_emotion.py --image assets/test.jpg
|
||||
python scripts/run_emotion.py --webcam
|
||||
|
||||
# Gaze estimation
|
||||
python scripts/run_gaze_estimation.py --image assets/test.jpg
|
||||
python scripts/run_gaze_estimation.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--method` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.25) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
98
scripts/batch_process.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f'*.{ext}'))
|
||||
files.extend(input_dir.glob(f'*.{ext.upper()}'))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Batch process images with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input directory')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output directory')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f'No images found with extensions {extensions}')
|
||||
return
|
||||
|
||||
print(f'Found {len(image_files)} images')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc='Processing', unit='img'):
|
||||
out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f'\nFailed: {img_path.name}')
|
||||
|
||||
print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
60
scripts/download_model.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f'Downloading: {weight.value}')
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f' Done: {weight.value}')
|
||||
except Exception as e:
|
||||
print(f' Failed: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Download model weights')
|
||||
parser.add_argument(
|
||||
'--model-type',
|
||||
type=str,
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help='Model type to download. If not specified, downloads all.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_type:
|
||||
print(f'Downloading {args.model_type} models...')
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
print('Downloading all models...')
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f'\n{name}:')
|
||||
download_models(model_enum)
|
||||
|
||||
print('\nDone!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit on errors
|
||||
set -e
|
||||
|
||||
cd "$(dirname "$0")"/..
|
||||
|
||||
echo "Deleting existing release-related files..."
|
||||
rm -rf dist/ build/ *.egg-info
|
||||
|
||||
pip install --upgrade pip
|
||||
pip install twine
|
||||
|
||||
echo "Creating a package for the current release (PyPI compatible)..."
|
||||
python3 setup.py sdist bdist_wheel
|
||||
|
||||
echo "Release package created successfully in the 'dist/' folder."
|
||||
|
||||
|
||||
echo "Uploading the package to PyPI..."
|
||||
twine upload dist/*
|
||||
|
||||
echo "Release uploaded successfully!"
|
||||
130
scripts/run_age_gender.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f' Face {i + 1}: {gender_str}, {age} years old')
|
||||
draw_age_gender_label(image, face['bbox'], gender_id, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
|
||||
draw_age_gender_label(frame, face['bbox'], gender_id, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
110
scripts/run_detection.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
126
scripts/run_emotion.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# Emotion detection on detected faces
|
||||
# Usage: python run_emotion.py --image path/to/image.jpg
|
||||
# python run_emotion.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
|
||||
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
|
||||
draw_emotion_label(image, face['bbox'], emotion, confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
|
||||
draw_emotion_label(frame, face['bbox'], emotion, confidence)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, emotion_predictor, args.threshold)
|
||||
else:
|
||||
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
116
scripts/run_face_analyzer.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# Face analysis using FaceAnalyzer
|
||||
# Usage: python run_face_analyzer.py --image path/to/image.jpg
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f'Error: Image not found: {args.image}')
|
||||
return
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
126
scripts/run_face_parsing.py
Normal file
@@ -0,0 +1,126 @@
|
||||
# Face parsing on detected faces
|
||||
# Usage: python run_face_parsing.py --image path/to/image.jpg
|
||||
# python run_face_parsing.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
# Parse the face
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
# Visualize the parsing result
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
# Place the visualization back on the original image
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, parser):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
# Parse the face
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize the parsing result
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
# Place the visualization back on the frame
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--image', type=str, help='Path to input image')
|
||||
parser_arg.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser_arg.add_argument('--save_dir', type=str, default='outputs')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser_arg.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, parser)
|
||||
else:
|
||||
process_image(detector, parser, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
97
scripts/run_face_search.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0]['landmarks']
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError('Webcam could not be opened.')
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
landmarks = face['landmarks']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--image', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.image}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
104
scripts/run_gaze_estimation.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Gaze estimation on detected faces
|
||||
# Usage: python run_gaze_estimation.py --image path/to/image.jpg
|
||||
# python run_gaze_estimation.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°')
|
||||
|
||||
# Draw both bbox and gaze arrow with angle text
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, gaze_estimator):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
# Draw both bbox and gaze arrow
|
||||
draw_gaze(frame, bbox, pitch, yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, gaze_estimator)
|
||||
else:
|
||||
process_image(detector, gaze_estimator, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
117
scripts/run_landmarks.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Face {i + 1}',
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
103
scripts/run_recognition.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print('No faces detected.')
|
||||
return
|
||||
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
print(f' Embedding shape: {embedding.shape}')
|
||||
print(f' L2 norm (raw): {np.linalg.norm(embedding):.4f}')
|
||||
print(f' L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}')
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print('Error: Failed to load one or both images')
|
||||
return
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f'Similarity: {similarity:.4f}')
|
||||
print(f'Result: {"Same person" if is_match else "Different person"} (threshold: {threshold})')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face recognition and comparison')
|
||||
parser.add_argument('--image', type=str, help='Single image for embedding extraction')
|
||||
parser.add_argument('--image1', type=str, help='First image for comparison')
|
||||
parser.add_argument('--image2', type=str, help='Second image for comparison')
|
||||
parser.add_argument('--threshold', type=float, default=0.35, help='Similarity threshold')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print('Error: Provide --image or both --image1 and --image2')
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
109
scripts/run_video_detection.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input video path')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
28
scripts/sha256_generate.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open('rb') as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b''):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Compute SHA256 hash of a file')
|
||||
parser.add_argument('file', type=Path, help='Path to file')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f'File does not exist: {args.file}')
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
43
setup.py
@@ -1,43 +0,0 @@
|
||||
import os
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
# Read the README file for the long description
|
||||
long_description = ""
|
||||
if os.path.exists("README.md"):
|
||||
with open("README.md", "r", encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name="uniface",
|
||||
version="0.1.1",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"numpy",
|
||||
"opencv-python",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"requests",
|
||||
"torch"
|
||||
],
|
||||
extras_require={
|
||||
"dev": ["pytest"],
|
||||
},
|
||||
description="UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
author="Yakhyokhuja Valikhujaev",
|
||||
author_email="yakhyo9696@gmail.com",
|
||||
url="https://github.com/yakhyo/uniface",
|
||||
license="MIT",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Topic :: Software Development :: Libraries :: Python Modules",
|
||||
],
|
||||
keywords="face detection, face recognition, facial landmark, facial attribute, onnx, opencv, retinaface",
|
||||
python_requires=">=3.8",
|
||||
)
|
||||
57
test.py
@@ -1,57 +0,0 @@
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace, draw_detections
|
||||
|
||||
|
||||
def run_inference(image_path, save_image=False, vis_threshold=0.6):
|
||||
"""
|
||||
Perform inference on an image, draw detections, and optionally save the output image.
|
||||
|
||||
Args:
|
||||
image_path (str): Path to the input image.
|
||||
save_image (bool): Whether to save the output image with detections.
|
||||
vis_threshold (float): Confidence threshold for displaying detections.
|
||||
"""
|
||||
# Load the image
|
||||
original_image = cv2.imread(image_path)
|
||||
if original_image is None:
|
||||
print(f"Error: Could not read image from {image_path}")
|
||||
return
|
||||
|
||||
# Perform face detection
|
||||
boxes, landmarks = retinaface_inference.detect(original_image)
|
||||
|
||||
# Draw detections on the image
|
||||
draw_detections(original_image, (boxes, landmarks), vis_threshold)
|
||||
|
||||
# Save the output image if requested
|
||||
if save_image:
|
||||
im_name = os.path.splitext(os.path.basename(image_path))[0]
|
||||
save_name = f"{im_name}_out.jpg"
|
||||
cv2.imwrite(save_name, original_image)
|
||||
print(f"Image saved at '{save_name}'")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import time
|
||||
|
||||
# Initialize and run the ONNX inference
|
||||
retinaface_inference = RetinaFace(
|
||||
model="retinaface_mnet_v2",
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
img_path = "assets/test.jpg"
|
||||
avg = 0
|
||||
for _ in range(50):
|
||||
st = time.time()
|
||||
run_inference(img_path, save_image=True, vis_threshold=0.6)
|
||||
d = time.time() - st
|
||||
print(d)
|
||||
avg += d
|
||||
print("avg", avg / 50)
|
||||
117
tests/test_age_gender.py
Normal file
@@ -0,0 +1,117 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def age_gender_model():
|
||||
return AgeGender()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
|
||||
assert isinstance(age, int), f'Age should be int, got {type(age)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
test_bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[100, 100, 300, 300],
|
||||
[50, 50, 400, 400],
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
|
||||
assert age1 == age2, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[200, 200, 350, 350],
|
||||
[400, 400, 550, 550],
|
||||
]
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender_id, age))
|
||||
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for gender_id, age in results:
|
||||
assert gender_id in [0, 1]
|
||||
assert 0 <= age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f'Age should be non-negative, got {age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
274
tests/test_factory.py
Normal file
@@ -0,0 +1,274 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import (
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
|
||||
# create_detector tests
|
||||
def test_create_detector_retinaface():
|
||||
"""
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector('retinaface')
|
||||
assert detector is not None, 'Failed to create RetinaFace detector'
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
"""
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector('scrfd')
|
||||
assert detector is not None, 'Failed to create SCRFD detector'
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
"""
|
||||
Test creating detector with custom configuration.
|
||||
"""
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
)
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
"""
|
||||
Test that invalid detector method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_detector('invalid_method')
|
||||
|
||||
|
||||
def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
def test_create_recognizer_arcface():
|
||||
"""
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('arcface')
|
||||
assert recognizer is not None, 'Failed to create ArcFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
"""
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('mobileface')
|
||||
assert recognizer is not None, 'Failed to create MobileFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
"""
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('sphereface')
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
"""
|
||||
Test that invalid recognizer method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_recognizer('invalid_method')
|
||||
|
||||
|
||||
# create_landmarker tests
|
||||
def test_create_landmarker():
|
||||
"""
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker('2d106det')
|
||||
assert landmarker is not None, 'Failed to create Landmark106 detector'
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
"""
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, 'Failed to create default landmarker'
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
"""
|
||||
Test that invalid landmarker method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_landmarker('invalid_method')
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
def test_list_available_detectors():
|
||||
"""
|
||||
Test that list_available_detectors returns a dictionary.
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), 'Should return a dictionary of detectors'
|
||||
assert len(detectors) > 0, 'Should have at least one detector available'
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
"""
|
||||
Test that list includes known detectors.
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
# Should include at least these detectors
|
||||
assert 'retinaface' in detectors, "Should include 'retinaface'"
|
||||
assert 'scrfd' in detectors, "Should include 'scrfd'"
|
||||
|
||||
|
||||
# Integration tests
|
||||
def test_detector_inference_from_factory():
|
||||
"""
|
||||
Test that detector created from factory can perform inference.
|
||||
"""
|
||||
detector = create_detector('retinaface')
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), 'Detector should return list of faces'
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
"""
|
||||
Test that recognizer created from factory can perform inference.
|
||||
"""
|
||||
recognizer = create_recognizer('arcface')
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
"""
|
||||
Test that landmarker created from factory can perform inference.
|
||||
"""
|
||||
landmarker = create_landmarker('2d106det')
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
"""
|
||||
Test that multiple detectors can be created independently.
|
||||
"""
|
||||
detector1 = create_detector('retinaface')
|
||||
detector2 = create_detector('scrfd')
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, 'Should create separate instances'
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces_high = detector_high_thresh.detect(mock_image)
|
||||
faces_low = detector_low_thresh.detect(mock_image)
|
||||
|
||||
# Both should work
|
||||
assert isinstance(faces_high, list)
|
||||
assert isinstance(faces_low, list)
|
||||
|
||||
|
||||
def test_factory_returns_correct_types():
|
||||
"""
|
||||
Test that factory functions return instances of the correct types.
|
||||
"""
|
||||
from uniface import ArcFace, Landmark106, RetinaFace
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
107
tests/test_landmark.py
Normal file
@@ -0,0 +1,107 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def landmark_model():
|
||||
return Landmark106()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_bbox():
|
||||
return [100, 100, 300, 300]
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, 'Landmark106 model initialization failed.'
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f'Expected shape (106, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
x_coords = landmarks[:, 0]
|
||||
y_coords = landmarks[:, 1]
|
||||
|
||||
x1, y1, x2, y2 = mock_bbox
|
||||
margin = 50
|
||||
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f'Only {x_in_bounds}/106 x-coordinates within bounds'
|
||||
assert y_in_bounds >= 95, f'Only {y_in_bounds}/106 y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
test_bboxes = [
|
||||
[50, 50, 150, 150],
|
||||
[100, 100, 300, 300],
|
||||
[50, 50, 400, 400],
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), 'Integer conversion should preserve shape'
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], 'Should convert to integer type'
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
118
tests/test_parsing.py
Normal file
@@ -0,0 +1,118 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
215
tests/test_recognition.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def arcface_model():
|
||||
"""
|
||||
Fixture to initialize the ArcFace model for testing.
|
||||
"""
|
||||
return ArcFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mobileface_model():
|
||||
"""
|
||||
Fixture to initialize the MobileFace model for testing.
|
||||
"""
|
||||
return MobileFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sphereface_model():
|
||||
"""
|
||||
Fixture to initialize the SphereFace model for testing.
|
||||
"""
|
||||
return SphereFace()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_aligned_face():
|
||||
"""
|
||||
Create a mock 112x112 aligned face image.
|
||||
"""
|
||||
return np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
"""
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# ArcFace Tests
|
||||
def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, 'ArcFace model initialization failed.'
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that ArcFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
"""
|
||||
Test that normalized embeddings have unit length.
|
||||
"""
|
||||
# Create a larger mock image for alignment
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = arcface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that the same input produces the same embedding.
|
||||
"""
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, 'MobileFace model initialization failed.'
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that MobileFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
"""
|
||||
Test that MobileFace normalized embeddings have unit length.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, 'SphereFace model initialization failed.'
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that SphereFace produces embeddings with the correct shape.
|
||||
"""
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
"""
|
||||
Test that SphereFace normalized embeddings have unit length.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
def test_different_models_different_embeddings(arcface_model, mobileface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that different models produce different embeddings for the same input.
|
||||
"""
|
||||
arcface_emb = arcface_model.get_embedding(mock_aligned_face)
|
||||
mobileface_emb = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), 'Different models should produce different embeddings'
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test computing similarity between embeddings.
|
||||
"""
|
||||
# Get two embeddings
|
||||
emb1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# Create a slightly different image
|
||||
mock_aligned_face2 = mock_aligned_face.copy()
|
||||
mock_aligned_face2[:10, :10] = 0 # Modify a small region
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face2)
|
||||
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
"""
|
||||
Test that the same face produces high similarity.
|
||||
"""
|
||||
emb1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f'Same face should have similarity > 0.99, got {similarity}'
|
||||
@@ -1,15 +1,14 @@
|
||||
import pytest
|
||||
import numpy as np
|
||||
from uniface import RetinaFace
|
||||
import pytest
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def retinaface_model():
|
||||
"""
|
||||
Fixture to initialize the RetinaFace model for testing.
|
||||
"""
|
||||
return RetinaFace(
|
||||
model="retinaface_mnet_v2",
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
@@ -18,61 +17,39 @@ def retinaface_model():
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
"""
|
||||
Test that the RetinaFace model initializes correctly.
|
||||
"""
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
assert retinaface_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
"""
|
||||
Test inference on a 640x640 BGR image.
|
||||
"""
|
||||
# Generate a mock 640x640 BGR image
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(mock_image)
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
# Check output types
|
||||
assert isinstance(detections, np.ndarray), "Detections should be a numpy array."
|
||||
assert isinstance(landmarks, np.ndarray), "Landmarks should be a numpy array."
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
# Check that detections have the expected shape
|
||||
if detections.size > 0: # If faces are detected
|
||||
assert detections.shape[1] == 5, "Each detection should have 5 values (x1, y1, x2, y2, score)."
|
||||
bbox = face['bbox']
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
# Check landmarks shape
|
||||
if landmarks.size > 0:
|
||||
assert landmarks.shape[1:] == (5, 2), "Landmarks should have shape (N, 5, 2)."
|
||||
landmarks = face['landmarks']
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
"""
|
||||
Test that detections respect the confidence threshold.
|
||||
"""
|
||||
# Generate a mock 640x640 BGR image
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
# Run inference
|
||||
detections, _ = retinaface_model.detect(mock_image)
|
||||
|
||||
# Ensure all detections have confidence scores above the threshold
|
||||
if detections.size > 0: # If faces are detected
|
||||
confidence_scores = detections[:, 4]
|
||||
assert (confidence_scores >= 0.5).all(), "Some detections have confidence below the threshold."
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
"""
|
||||
Test inference on an image without detectable faces.
|
||||
"""
|
||||
# Generate an empty (black) 640x640 image
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Run inference
|
||||
detections, landmarks = retinaface_model.detect(empty_image)
|
||||
|
||||
# Ensure no detections or landmarks are found
|
||||
assert detections.size == 0, "Detections should be empty for a blank image."
|
||||
assert landmarks.size == 0, "Landmarks should be empty for a blank image."
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
71
tests/test_scrfd.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
bbox = face['bbox']
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)]
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f'Should return list for size {size}'
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), 'SCRFD 10G should return list of detections.'
|
||||
262
tests/test_utils.py
Normal file
@@ -0,0 +1,262 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface import compute_similarity, face_alignment
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_image():
|
||||
"""
|
||||
Create a mock 640x640 BGR image.
|
||||
"""
|
||||
return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
Standard positions for a face roughly centered at (112/2, 112/2).
|
||||
"""
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041], # Right mouth corner
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# compute_similarity tests
|
||||
def test_compute_similarity_same_embedding():
|
||||
"""
|
||||
Test that similarity of an embedding with itself is 1.0.
|
||||
"""
|
||||
embedding = np.random.randn(1, 512).astype(np.float32)
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f'Self-similarity should be 1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
"""
|
||||
Test that similarity is always in the range [-1, 1].
|
||||
"""
|
||||
# Test with multiple random embeddings
|
||||
for _ in range(10):
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
"""
|
||||
Test that orthogonal embeddings have similarity close to 0.
|
||||
"""
|
||||
# Create orthogonal embeddings
|
||||
emb1 = np.zeros((1, 512), dtype=np.float32)
|
||||
emb1[0, 0] = 1.0 # [1, 0, 0, ..., 0]
|
||||
|
||||
emb2 = np.zeros((1, 512), dtype=np.float32)
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f'Orthogonal embeddings should have similarity 0.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
"""
|
||||
Test that opposite embeddings have similarity close to -1.
|
||||
"""
|
||||
emb1 = np.ones((1, 512), dtype=np.float32)
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f'Opposite embeddings should have similarity -1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
"""
|
||||
Test that similarity(A, B) == similarity(B, A).
|
||||
"""
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), 'Similarity should be symmetric'
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
"""
|
||||
Test that compute_similarity returns a float.
|
||||
"""
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that face_alignment produces output with the correct shape.
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f'Expected shape (112, 112, 3), got {aligned.shape}'
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face has the correct data type.
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f'Expected uint8, got {aligned.dtype}'
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test face alignment with different output sizes.
|
||||
"""
|
||||
# Only test sizes that are multiples of 112 or 128 as required by the function
|
||||
test_sizes = [(112, 112), (128, 128), (224, 224)]
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f'Failed for size {size}'
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that the same input produces the same aligned face.
|
||||
"""
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), 'Same input should produce same aligned face'
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
"""
|
||||
Test that landmarks can be passed as a list of lists (converted to array).
|
||||
"""
|
||||
landmarks_list = [
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
]
|
||||
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with landmarks as array'
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face pixel values are in valid range [0, 255].
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), 'Pixel values should be >= 0'
|
||||
assert np.all(aligned <= 255), 'Pixel values should be <= 255'
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
"""
|
||||
Test that aligned face is not all zeros (actual transformation occurred).
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), 'Aligned face should have some non-zero pixels'
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
"""
|
||||
Test alignment with landmarks at different positions in the image.
|
||||
"""
|
||||
# Landmarks at different positions
|
||||
positions = [
|
||||
np.array(
|
||||
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
]
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f'Failed for landmarks at {landmarks[0]}'
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
"""
|
||||
Test that face_alignment works specifically with 5-point landmarks.
|
||||
"""
|
||||
# Standard 5-point landmarks
|
||||
landmarks_5pt = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with 5-point landmarks'
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
"""
|
||||
Test compute_similarity with realistic embedding dimensions.
|
||||
"""
|
||||
# Simulate ArcFace/MobileFace/SphereFace embeddings (512-dim)
|
||||
emb1 = np.random.randn(1, 512).astype(np.float32)
|
||||
emb2 = np.random.randn(1, 512).astype(np.float32)
|
||||
|
||||
# Normalize (as done in get_normalized_embedding)
|
||||
emb1 = emb1 / np.linalg.norm(emb1)
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
@@ -11,18 +11,75 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.5.0'
|
||||
|
||||
from uniface.retinaface import RetinaFace
|
||||
from uniface.log import Logger
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.version import __version__, __author__
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender
|
||||
from .face import Face
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"__author__"
|
||||
"RetinaFace",
|
||||
"Logger",
|
||||
"verify_model_weights",
|
||||
"draw_detections"
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
# Core classes
|
||||
'Face',
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
'create_detector',
|
||||
'create_face_parser',
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
'YOLOv5Face',
|
||||
# Recognition models
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
'AgeGender',
|
||||
'Emotion',
|
||||
# Utilities
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'vis_parsing_maps',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'Logger',
|
||||
'enable_logging',
|
||||
]
|
||||
|
||||
83
uniface/analyzer.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: Optional[BaseRecognizer] = None,
|
||||
age_gender: Optional[AgeGender] = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
|
||||
embedding = None
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender, age = self.age_gender.predict(image, bbox)
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender=gender,
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
|
||||
if self.recognizer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
99
uniface/attribute/__init__.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
|
||||
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
This high-level API simplifies the creation of attribute models by
|
||||
dynamically selecting the correct class based on the provided model enum.
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
"""
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender_id, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender_id'] = gender_id
|
||||
face['attributes']['age'] = age
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
|
||||
return detections
|
||||
211
uniface/attribute/age_gender.py
Normal file
@@ -0,0 +1,211 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights
|
||||
from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
|
||||
class AgeGender(Attribute):
|
||||
"""
|
||||
Age and gender prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender ID (0 for Female,
|
||||
1 for Male) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
Defaults to `AgeGenderWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: Optional[Tuple[int, int]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._user_input_size = input_size # Store user preference
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
|
||||
# Use user-provided size if given, otherwise auto-detect from model
|
||||
model_input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
if self._user_input_size is not None:
|
||||
self.input_size = self._user_input_size
|
||||
if self._user_input_size != model_input_size:
|
||||
Logger.warning(
|
||||
f'Using custom input_size {self.input_size}, '
|
||||
f'but model expects {model_input_size}. This may affect accuracy.'
|
||||
)
|
||||
else:
|
||||
self.input_size = model_input_size
|
||||
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
bbox = np.asarray(bbox)
|
||||
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
||||
scale = self.input_size[1] / (max(width, height) * 1.5)
|
||||
|
||||
# **Rotation parameter restored here**
|
||||
rotation = 0.0
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
Args:
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
|
||||
and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender_id, age
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender_id, age = self.postprocess(prediction)
|
||||
return gender_id, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender_id, age = age_gender_predictor.predict(frame, box)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{gender_str}, {age}'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
92
uniface/attribute/base.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
Abstract base class for face attribute models.
|
||||
|
||||
This class defines the common interface that all attribute models
|
||||
(e.g., age-gender, emotion) must implement. It ensures a consistent API
|
||||
across different attribute prediction modules in the library, making them
|
||||
interchangeable and easy to use.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
|
||||
warm-up procedures to prepare the model for prediction.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
|
||||
"""
|
||||
Preprocesses the input data for the model.
|
||||
|
||||
This method should take a raw image and any other necessary data
|
||||
(like bounding boxes or landmarks) and convert it into the format
|
||||
expected by the model's inference engine (e.g., a blob or tensor).
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image containing the face, typically
|
||||
in BGR format.
|
||||
*args: Additional arguments required for preprocessing, such as
|
||||
bounding boxes or facial landmarks.
|
||||
|
||||
Returns:
|
||||
The preprocessed data ready for model inference.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, prediction: Any) -> Any:
|
||||
"""
|
||||
Postprocesses the raw model output into a human-readable format.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a meaningful result, such as an age value, a gender
|
||||
label, or an emotion category.
|
||||
|
||||
Args:
|
||||
prediction (Any): The raw output from the model's inference.
|
||||
|
||||
Returns:
|
||||
The final, processed attributes.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any:
|
||||
"""
|
||||
Performs end-to-end attribute prediction on a given image.
|
||||
|
||||
This method orchestrates the full pipeline: it calls the preprocess,
|
||||
inference, and postprocess steps to return the final, user-friendly
|
||||
attribute prediction.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image containing the face.
|
||||
*args: Additional data required for prediction, such as a bounding
|
||||
box or landmarks.
|
||||
|
||||
Returns:
|
||||
The final predicted attributes.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `predict` method.
|
||||
"""
|
||||
return self.predict(*args, **kwargs)
|
||||
194
uniface/attribute/emotion.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
__all__ = ['Emotion']
|
||||
|
||||
|
||||
class Emotion(Attribute):
|
||||
"""
|
||||
Emotion recognition model using a TorchScript model.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting one of several emotion categories from a face
|
||||
image. It requires 5-point facial landmarks for alignment.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
|
||||
Args:
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f'Initializing Emotion with model={model_weights.name}')
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.device = torch.device('mps')
|
||||
elif torch.cuda.is_available():
|
||||
self.device = torch.device('cuda')
|
||||
else:
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = [
|
||||
'Neutral',
|
||||
'Happy',
|
||||
'Sad',
|
||||
'Surprise',
|
||||
'Fear',
|
||||
'Disgust',
|
||||
'Angry',
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append('Contempt')
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Loads and initializes the TorchScript model for inference.
|
||||
"""
|
||||
try:
|
||||
self.model = torch.jit.load(self.model_path, map_location=self.device)
|
||||
self.model.eval()
|
||||
# Warm-up with a dummy input for faster first inference
|
||||
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
|
||||
with torch.no_grad():
|
||||
self.model(dummy_input)
|
||||
Logger.info(f'Successfully initialized Emotion model on {self.device}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
landmark (Union[List, np.ndarray]): The 5-point facial landmarks.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The preprocessed image tensor ready for inference.
|
||||
"""
|
||||
landmark = np.asarray(landmark)
|
||||
|
||||
aligned_image, _ = face_alignment(image, landmark)
|
||||
|
||||
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
|
||||
rgb_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB)
|
||||
resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
normalized_image = (resized_image - mean) / std
|
||||
transposed_image = normalized_image.transpose((2, 0, 1))
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
probabilities = torch.nn.functional.softmax(prediction, dim=1).squeeze().cpu().numpy()
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
input_tensor = self.preprocess(image, landmark)
|
||||
with torch.no_grad():
|
||||
output = self.model(input_tensor)
|
||||
if isinstance(output, tuple):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{emotion} ({confidence:.2f})'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
@@ -1,14 +1,23 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List
|
||||
__all__ = [
|
||||
'resize_image',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
@@ -44,28 +53,21 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Anchor box coordinates as a tensor.
|
||||
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
@@ -79,20 +81,20 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> torch.Tensor:
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = torch.Tensor(anchors).view(-1, 4)
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
def nms(dets: List[np.ndarray], threshold: float):
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
List[int]: Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -123,56 +125,119 @@ def nms(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
the encoding done for offset regression at train time (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
tensor: Decoded bounding box predictions
|
||||
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = torch.empty_like(loc)
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> torch.Tensor:
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
|
||||
Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (tensor): Landmark predictions for localization layers.
|
||||
Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
|
||||
priors (tensor): Prior boxes in center-offset form.
|
||||
Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
|
||||
variances (list[float]): Variances of the prior boxes to scale the decoded values.
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
landmarks (tensor): Decoded landmark predictions.
|
||||
Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
|
||||
predictions = predictions.view(predictions.size(0), 5, 2)
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Perform the same operation on all landmark pairs at once
|
||||
landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.view(landmarks.size(0), -1)
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom) with shape (n, 4).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
|
||||
if max_shape is not None:
|
||||
x1 = np.clip(x1, 0, max_shape[1])
|
||||
y1 = np.clip(y1, 0, max_shape[0])
|
||||
x2 = np.clip(x2, 0, max_shape[1])
|
||||
y2 = np.clip(y2, 0, max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = np.clip(px, 0, max_shape[1])
|
||||
py = np.clip(py, 0, max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
|
||||
@@ -1,26 +1,210 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
MODEL_URLS: Dict[str, str] = {
|
||||
'retinaface_mnet025': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1_0.25.onnx',
|
||||
'retinaface_mnet050': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1_0.50.onnx',
|
||||
'retinaface_mnet_v1': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv1.onnx',
|
||||
'retinaface_mnet_v2': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_mv2.onnx',
|
||||
'retinaface_r18': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_r18.onnx',
|
||||
'retinaface_r34': 'https://github.com/yakhyo/uniface/releases/download/v0.1.1/retinaface_r34.onnx'
|
||||
# fmt: off
|
||||
class SphereFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on MS1M V2 dataset with 5.8 million images of 85k identities.
|
||||
https://github.com/yakhyo/face-recognition
|
||||
"""
|
||||
SPHERE20 = "sphere20"
|
||||
SPHERE36 = "sphere36"
|
||||
|
||||
class MobileFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on MS1M V2 dataset with 5.8 million images of 85k identities.
|
||||
https://github.com/yakhyo/face-recognition
|
||||
"""
|
||||
MNET_025 = "mobilenetv1_025"
|
||||
MNET_V2 = "mobilenetv2"
|
||||
MNET_V3_SMALL = "mobilenetv3_small"
|
||||
MNET_V3_LARGE = "mobilenetv3_large"
|
||||
|
||||
class ArcFaceWeights(str, Enum):
|
||||
"""
|
||||
Pretrained weights from ArcFace model (insightface).
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
MNET = "arcface_mnet"
|
||||
RESNET = "arcface_resnet"
|
||||
|
||||
class RetinaFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
https://github.com/yakhyo/retinaface-pytorch
|
||||
"""
|
||||
MNET_025 = "retinaface_mnet025"
|
||||
MNET_050 = "retinaface_mnet050"
|
||||
MNET_V1 = "retinaface_mnet_v1"
|
||||
MNET_V2 = "retinaface_mnet_v2"
|
||||
RESNET18 = "retinaface_r18"
|
||||
RESNET34 = "retinaface_r34"
|
||||
|
||||
|
||||
class SCRFDWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
SCRFD_10G_KPS = "scrfd_10g"
|
||||
SCRFD_500M_KPS = "scrfd_500m"
|
||||
|
||||
|
||||
class YOLOv5FaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
Original implementation: https://github.com/deepcam-cn/yolov5-face
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov5-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV5N: 11MB, 93.61% Easy / 91.52% Medium / 80.53% Hard
|
||||
- YOLOV5S: 28MB, 94.33% Easy / 92.61% Medium / 83.15% Hard
|
||||
- YOLOV5M: 82MB, 95.30% Easy / 93.76% Medium / 85.28% Hard
|
||||
"""
|
||||
YOLOV5N = "yolov5n"
|
||||
YOLOV5S = "yolov5s"
|
||||
YOLOV5M = "yolov5m"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
"""
|
||||
Trained on AffectNet dataset.
|
||||
https://github.com/SainingZhang/DDAMFN/tree/main/DDAMFN
|
||||
"""
|
||||
AFFECNET7 = "affecnet7"
|
||||
AFFECNET8 = "affecnet8"
|
||||
|
||||
|
||||
class AgeGenderWeights(str, Enum):
|
||||
"""
|
||||
Trained on CelebA dataset.
|
||||
https://github.com/deepinsight/insightface
|
||||
"""
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
https://github.com/deepinsight/insightface/tree/master/alignment/coordinate_reg
|
||||
"""
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
|
||||
class GazeWeights(str, Enum):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation models.
|
||||
Trained on Gaze360 dataset.
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
"""
|
||||
RESNET18 = "gaze_resnet18"
|
||||
RESNET34 = "gaze_resnet34"
|
||||
RESNET50 = "gaze_resnet50"
|
||||
MOBILENET_V2 = "gaze_mobilenetv2"
|
||||
MOBILEONE_S0 = "gaze_mobileone_s0"
|
||||
|
||||
|
||||
class ParsingWeights(str, Enum):
|
||||
"""
|
||||
Face Parsing: Semantic Segmentation of Facial Components.
|
||||
Trained on CelebAMask-HQ dataset.
|
||||
https://github.com/yakhyo/face-parsing
|
||||
"""
|
||||
RESNET18 = "parsing_resnet18"
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
|
||||
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
|
||||
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5n_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5s_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5M: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5m_face.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
GazeWeights.RESNET18: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
|
||||
GazeWeights.RESNET34: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet34_gaze.onnx',
|
||||
GazeWeights.RESNET50: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet50_gaze.onnx',
|
||||
GazeWeights.MOBILENET_V2: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobilenetv2_gaze.onnx',
|
||||
GazeWeights.MOBILEONE_S0: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobileone_s0_gaze.onnx',
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[str, str] = {
|
||||
'retinaface_mnet025': 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
'retinaface_mnet050': 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
'retinaface_mnet_v1': '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
'retinaface_mnet_v2': '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
'retinaface_r18': 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
'retinaface_r34': 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630'
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
|
||||
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
|
||||
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'eb244a06e36999db732b317c2b30fa113cd6cfc1a397eaf738f2d6f33c01f640',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'fc682801cd5880e1e296184a14aea0035486b5146ec1a1389d2e7149cb134bb2',
|
||||
YOLOv5FaceWeights.YOLOV5M: '04302ce27a15bde3e20945691b688e2dd018a10e92dd8932146bede6a49207b2',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
168
uniface/detection/__init__.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f'{method_name}_{str(sorted_kwargs)}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
_detector_cache[cache_key] = create_detector(method, **kwargs)
|
||||
|
||||
detector = _detector_cache[cache_key]
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
|
||||
**kwargs: Detector-specific parameters
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
|
||||
Examples:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... )
|
||||
|
||||
>>> # YOLOv5-Face detector
|
||||
>>> detector = create_detector(
|
||||
... 'yolov5face',
|
||||
... model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
... conf_thresh=0.25,
|
||||
... nms_thresh=0.45
|
||||
... )
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == 'retinaface':
|
||||
return RetinaFace(**kwargs)
|
||||
|
||||
elif method == 'scrfd':
|
||||
return SCRFD(**kwargs)
|
||||
|
||||
elif method == 'yolov5face':
|
||||
return YOLOv5Face(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face']
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
'description': 'RetinaFace detector with high accuracy',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
'scrfd': {
|
||||
'description': 'SCRFD detector - fast and accurate with efficient architecture',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
'yolov5face': {
|
||||
'description': 'YOLOv5-Face detector - accurate face detection with landmarks',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'conf_thresh': 0.25,
|
||||
'nms_thresh': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'RetinaFace',
|
||||
'YOLOv5Face',
|
||||
'BaseDetector',
|
||||
]
|
||||
104
uniface/detection/base.py
Normal file
@@ -0,0 +1,104 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
Abstract base class for all face detectors.
|
||||
|
||||
This class defines the interface that all face detectors must implement,
|
||||
ensuring consistency across different detection methods.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the detector with configuration parameters."""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array with shape (H, W, C)
|
||||
**kwargs: Additional detection parameters
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess input image for detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Any:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
**kwargs: Additional postprocessing parameters
|
||||
|
||||
Returns:
|
||||
Any: Processed outputs (implementation-specific format, typically tuple of arrays)
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
return f'{self.__class__.__name__}({self.config})'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Detailed string representation."""
|
||||
return self.__str__()
|
||||
|
||||
@property
|
||||
def supports_landmarks(self) -> bool:
|
||||
"""
|
||||
Whether this detector supports landmark detection.
|
||||
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
"""
|
||||
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detector information and configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Detector information
|
||||
"""
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config,
|
||||
}
|
||||
383
uniface/detection/retinaface.py
Normal file
@@ -0,0 +1,383 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import (
|
||||
decode_boxes,
|
||||
decode_landmarks,
|
||||
generate_anchors,
|
||||
non_max_suppression,
|
||||
resize_image,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
|
||||
class RetinaFace(BaseDetector):
|
||||
"""
|
||||
Face detector based on the RetinaFace architecture.
|
||||
|
||||
Title: "RetinaFace: Single-stage Dense Face Localisation in the Wild"
|
||||
Paper: https://arxiv.org/abs/1905.00641
|
||||
Code: https://github.com/yakhyo/retinaface-pytorch
|
||||
|
||||
Args:
|
||||
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Fixed input size (width, height) if `dynamic_size=False`.
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
**kwargs: Advanced options:
|
||||
pre_nms_topk (int): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Max number of detections kept after NMS. Defaults to 750.
|
||||
dynamic_size (bool): If True, generate anchors dynamically per input image. Defaults to False.
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold for confidence-based filtering.
|
||||
nms_thresh (float): IoU threshold used for NMS.
|
||||
pre_nms_topk (int): Limit on proposals before applying NMS.
|
||||
post_nms_topk (int): Limit on retained detections after NMS.
|
||||
dynamic_size (bool): Flag indicating dynamic or static input sizing.
|
||||
input_size (Tuple[int, int]): Static input size if `dynamic_size=False`.
|
||||
_model_path (str): Absolute path to the verified model weights.
|
||||
_priors (np.ndarray): Precomputed anchor boxes (if static size).
|
||||
_supports_landmarks (bool): Indicates landmark prediction support.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
|
||||
self.post_nms_topk = kwargs.get('post_nms_topk', 750)
|
||||
self.dynamic_size = kwargs.get('dynamic_size', False)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not self.dynamic_size and self.input_size is not None:
|
||||
self._priors = generate_anchors(image_size=self.input_size)
|
||||
Logger.debug('Generated anchors for static input size.')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess input image for model inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
|
||||
"""
|
||||
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
if self.dynamic_size:
|
||||
height, width, _ = image.shape
|
||||
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
|
||||
resize_factor = 1.0 # No resizing
|
||||
else:
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
height, width, _ = image.shape
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocessing
|
||||
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
|
||||
|
||||
if max_num > 0 and detections.shape[0] > max_num:
|
||||
# Calculate area of detections
|
||||
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == 'max':
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(scores)[::-1][:max_num]
|
||||
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][: self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = (
|
||||
detections[: self.post_nms_topk],
|
||||
landmarks[: self.post_nms_topk],
|
||||
)
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.float32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
landmark_scale = np.array([shape[0], shape[1]] * 5)
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
357
uniface/detection/scrfd.py
Normal file
@@ -0,0 +1,357 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
__all__ = ['SCRFD']
|
||||
|
||||
|
||||
class SCRFD(BaseDetector):
|
||||
"""
|
||||
Face detector based on the SCRFD architecture.
|
||||
|
||||
Title: "Sample and Computation Redistribution for Efficient Face Detection"
|
||||
Paper: https://arxiv.org/abs/2105.04714
|
||||
Code: https://github.com/insightface/insightface
|
||||
|
||||
Args:
|
||||
model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Input image size (width, height).
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
**kwargs: Reserved for future advanced options.
|
||||
|
||||
Attributes:
|
||||
model_name (SCRFDWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
|
||||
_fmc (int): Number of feature map levels used in the model.
|
||||
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
|
||||
_num_anchors (int): Number of anchors per feature location.
|
||||
_center_cache (Dict): Cached anchor centers for efficient forward passes.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.input_size = input_size
|
||||
|
||||
# ------- SCRFD model params ------
|
||||
self._fmc = 3
|
||||
self._feat_stride_fpn = [8, 16, 32]
|
||||
self._num_anchors = 2
|
||||
self._center_cache = {}
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f'Initializing SCRFD with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
|
||||
"""Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
|
||||
"""
|
||||
image = image.astype(np.float32)
|
||||
image = (image - 127.5) / 127.5
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
|
||||
scores_list = []
|
||||
bboxes_list = []
|
||||
kpss_list = []
|
||||
|
||||
image_size = image_size
|
||||
|
||||
fmc = self._fmc
|
||||
for idx, stride in enumerate(self._feat_stride_fpn):
|
||||
scores = outputs[idx]
|
||||
bbox_preds = outputs[fmc + idx] * stride
|
||||
kps_preds = outputs[2 * fmc + idx] * stride
|
||||
|
||||
# Generate anchors
|
||||
fm_height = image_size[0] // stride
|
||||
fm_width = image_size[1] // stride
|
||||
cache_key = (fm_height, fm_width, stride)
|
||||
|
||||
if cache_key in self._center_cache:
|
||||
anchor_centers = self._center_cache[cache_key]
|
||||
else:
|
||||
y, x = np.mgrid[:fm_height, :fm_width]
|
||||
anchor_centers = np.stack((x, y), axis=-1).astype(np.float32)
|
||||
anchor_centers = (anchor_centers * stride).reshape(-1, 2)
|
||||
|
||||
if self._num_anchors > 1:
|
||||
anchor_centers = np.tile(anchor_centers[:, None, :], (1, self._num_anchors, 1)).reshape(-1, 2)
|
||||
|
||||
if len(self._center_cache) < 100:
|
||||
self._center_cache[cache_key] = anchor_centers
|
||||
|
||||
pos_indices = np.where(scores >= self.conf_thresh)[0]
|
||||
if len(pos_indices) == 0:
|
||||
continue
|
||||
|
||||
bboxes = distance2bbox(anchor_centers, bbox_preds)[pos_indices]
|
||||
scores_selected = scores[pos_indices]
|
||||
scores_list.append(scores_selected)
|
||||
bboxes_list.append(bboxes)
|
||||
|
||||
landmarks = distance2kps(anchor_centers, kps_preds)
|
||||
landmarks = landmarks.reshape((landmarks.shape[0], -1, 2))
|
||||
kpss_list.append(landmarks[pos_indices])
|
||||
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
scores_list, bboxes_list, kpss_list = self.postprocess(outputs, image_size=image.shape[:2])
|
||||
|
||||
# Handle case when no faces are detected
|
||||
if not scores_list:
|
||||
return []
|
||||
|
||||
scores = np.vstack(scores_list)
|
||||
scores_ravel = scores.ravel()
|
||||
order = scores_ravel.argsort()[::-1]
|
||||
|
||||
bboxes = np.vstack(bboxes_list) / resize_factor
|
||||
landmarks = np.vstack(kpss_list) / resize_factor
|
||||
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_thresh)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
landmarks = landmarks[keep, :, :].astype(np.float32)
|
||||
|
||||
if 0 < max_num < detections.shape[0]:
|
||||
# Calculate area of detections
|
||||
area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == 'max':
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(values)[::-1][:max_num]
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
340
uniface/detection/yolov5.py
Normal file
@@ -0,0 +1,340 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import non_max_suppression
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
__all__ = ['YOLOv5Face']
|
||||
|
||||
|
||||
class YOLOv5Face(BaseDetector):
|
||||
"""
|
||||
Face detector based on the YOLOv5-Face architecture.
|
||||
|
||||
Title: "YOLO5Face: Why Reinventing a Face Detector"
|
||||
Paper: https://arxiv.org/abs/2105.12931
|
||||
Code: https://github.com/yakhyo/yolov5-face-onnx-inference (ONNX inference implementation)
|
||||
|
||||
Args:
|
||||
model_name (YOLOv5FaceWeights): Predefined model enum (e.g., `YOLOV5S`).
|
||||
Specifies the YOLOv5-Face variant to load. Defaults to YOLOV5S.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
input_size (int): Input image size. Defaults to 640.
|
||||
Note: ONNX model is fixed at 640. Changing this will cause inference errors.
|
||||
**kwargs: Advanced options:
|
||||
max_det (int): Maximum number of detections to return. Defaults to 750.
|
||||
|
||||
Attributes:
|
||||
model_name (YOLOv5FaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (int): Image size to which inputs are resized before inference.
|
||||
max_det (int): Maximum number of detections to return.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: YOLOv5FaceWeights = YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh: float = 0.6,
|
||||
nms_thresh: float = 0.5,
|
||||
input_size: int = 640,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # YOLOv5-Face supports landmarks
|
||||
|
||||
# Validate input size
|
||||
if input_size != 640:
|
||||
raise ValueError(
|
||||
f'YOLOv5Face only supports input_size=640 (got {input_size}). The ONNX model has a fixed input shape.'
|
||||
)
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.max_det = kwargs.get('max_det', 750)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
|
||||
"""
|
||||
Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format)
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float, Tuple[int, int]]: Preprocessed image, scale ratio, and padding
|
||||
"""
|
||||
# Get original image shape
|
||||
img_h, img_w = image.shape[:2]
|
||||
|
||||
# Calculate scale ratio
|
||||
scale = min(self.input_size / img_h, self.input_size / img_w)
|
||||
new_h, new_w = int(img_h * scale), int(img_w * scale)
|
||||
|
||||
# Resize image
|
||||
img_resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Create padded image
|
||||
img_padded = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
|
||||
|
||||
# Calculate padding
|
||||
pad_h = (self.input_size - new_h) // 2
|
||||
pad_w = (self.input_size - new_w) // 2
|
||||
|
||||
# Place resized image in center
|
||||
img_padded[pad_h : pad_h + new_h, pad_w : pad_w + new_w] = img_resized
|
||||
|
||||
# Convert to RGB and normalize
|
||||
img_rgb = cv2.cvtColor(img_padded, cv2.COLOR_BGR2RGB)
|
||||
img_normalized = img_rgb.astype(np.float32) / 255.0
|
||||
|
||||
# Transpose to CHW format (HWC -> CHW) and add batch dimension
|
||||
img_transposed = np.transpose(img_normalized, (2, 0, 1))
|
||||
img_batch = np.expand_dims(img_transposed, axis=0)
|
||||
img_batch = np.ascontiguousarray(img_batch)
|
||||
|
||||
return img_batch, scale, (pad_w, pad_h)
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
List[np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(
|
||||
self,
|
||||
predictions: np.ndarray,
|
||||
scale: float,
|
||||
padding: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model predictions.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Raw model output
|
||||
scale (float): Scale ratio used in preprocessing
|
||||
padding (Tuple[int, int]): Padding used in preprocessing
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Filtered detections and landmarks
|
||||
- detections: [x1, y1, x2, y2, conf]
|
||||
- landmarks: [5, 2] for each detection
|
||||
"""
|
||||
# predictions shape: (1, 25200, 16)
|
||||
# 16 = [x, y, w, h, obj_conf, cls_conf, 10 landmarks (5 points * 2 coords)]
|
||||
|
||||
predictions = predictions[0] # Remove batch dimension
|
||||
|
||||
# Filter by confidence
|
||||
mask = predictions[:, 4] >= self.conf_thresh
|
||||
predictions = predictions[mask]
|
||||
|
||||
if len(predictions) == 0:
|
||||
return np.array([]), np.array([])
|
||||
|
||||
# Convert from xywh to xyxy
|
||||
boxes = self._xywh2xyxy(predictions[:, :4])
|
||||
|
||||
# Get confidence scores
|
||||
scores = predictions[:, 4]
|
||||
|
||||
# Get landmarks (5 points, 10 coordinates)
|
||||
landmarks = predictions[:, 5:15].copy()
|
||||
|
||||
# Apply NMS
|
||||
detections_for_nms = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections_for_nms, self.nms_thresh)
|
||||
|
||||
if len(keep) == 0:
|
||||
return np.array([]), np.array([])
|
||||
|
||||
# Filter detections and limit to max_det
|
||||
keep = keep[: self.max_det]
|
||||
boxes = boxes[keep]
|
||||
scores = scores[keep]
|
||||
landmarks = landmarks[keep]
|
||||
|
||||
# Scale back to original image coordinates
|
||||
pad_w, pad_h = padding
|
||||
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
|
||||
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
|
||||
|
||||
# Scale landmarks
|
||||
for i in range(5):
|
||||
landmarks[:, i * 2] = (landmarks[:, i * 2] - pad_w) / scale
|
||||
landmarks[:, i * 2 + 1] = (landmarks[:, i * 2 + 1] - pad_h) / scale
|
||||
|
||||
# Reshape landmarks to (N, 5, 2)
|
||||
landmarks = landmarks.reshape(-1, 5, 2)
|
||||
|
||||
# Combine results
|
||||
detections = np.concatenate([boxes, scores[:, None]], axis=1)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _xywh2xyxy(self, x: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Convert bounding box format from xywh to xyxy.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): Boxes in [x, y, w, h] format
|
||||
|
||||
Returns:
|
||||
np.ndarray: Boxes in [x1, y1, x2, y2] format
|
||||
"""
|
||||
y = np.copy(x)
|
||||
y[..., 0] = x[..., 0] - x[..., 2] / 2 # x1
|
||||
y[..., 1] = x[..., 1] - x[..., 3] / 2 # y1
|
||||
y[..., 2] = x[..., 0] + x[..., 2] / 2 # x2
|
||||
y[..., 3] = x[..., 1] + x[..., 3] / 2 # y2
|
||||
return y
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
# Preprocess
|
||||
image_tensor, scale, padding = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocess
|
||||
detections, landmarks = self.postprocess(outputs[0], scale, padding)
|
||||
|
||||
# Handle case when no faces are detected
|
||||
if len(detections) == 0:
|
||||
return []
|
||||
|
||||
if 0 < max_num < detections.shape[0]:
|
||||
# Calculate area of detections
|
||||
area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == 'max':
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(values)[::-1][:max_num]
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
66
uniface/face.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
__all__ = ['Face']
|
||||
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
"""
|
||||
Detected face with analysis results.
|
||||
"""
|
||||
|
||||
# Required attributes
|
||||
bbox: np.ndarray
|
||||
confidence: float
|
||||
landmarks: np.ndarray
|
||||
|
||||
# Optional attributes
|
||||
embedding: Optional[np.ndarray] = None
|
||||
age: Optional[int] = None
|
||||
gender: Optional[int] = None # 0 or 1
|
||||
|
||||
def compute_similarity(self, other: 'Face') -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
if self.embedding is None or other.embedding is None:
|
||||
raise ValueError('Both faces must have embeddings for similarity computation')
|
||||
return float(compute_similarity(self.embedding, other.embedding))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return asdict(self)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
if self.gender is None:
|
||||
return None
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
@property
|
||||
def bbox_xyxy(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
|
||||
return self.bbox.copy()
|
||||
|
||||
@property
|
||||
def bbox_xywh(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, w, h) format."""
|
||||
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.gender is not None:
|
||||
parts.append(f'sex={self.sex}')
|
||||
if self.embedding is not None:
|
||||
parts.append(f'embedding_dim={self.embedding.shape[0]}')
|
||||
return ', '.join(parts) + ')'
|
||||
194
uniface/face_utils.py
Normal file
@@ -0,0 +1,194 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
|
||||
__all__ = [
|
||||
'face_alignment',
|
||||
'compute_similarity',
|
||||
'bbox_center_alignment',
|
||||
'transform_points_2d',
|
||||
]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
reference_alignment: np.ndarray = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Estimate the normalization transformation matrix for facial landmarks.
|
||||
|
||||
Args:
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
|
||||
np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the input landmark array does not have the shape (5, 2)
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
"""
|
||||
assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
|
||||
|
||||
# Handle both int and tuple inputs
|
||||
if isinstance(image_size, tuple):
|
||||
size = image_size[0] # Use width for ratio calculation
|
||||
else:
|
||||
size = image_size
|
||||
|
||||
assert size % 112 == 0 or size % 128 == 0, 'Image size must be a multiple of 112 or 128.'
|
||||
|
||||
if size % 112 == 0:
|
||||
ratio = float(size) / 112.0
|
||||
diff_x = 0.0
|
||||
else:
|
||||
ratio = float(size) / 128.0
|
||||
diff_x = 8.0 * ratio
|
||||
|
||||
# Adjust reference alignment based on ratio and diff_x
|
||||
alignment = reference_alignment * ratio
|
||||
alignment[:, 0] += diff_x
|
||||
|
||||
# Compute the transformation matrix
|
||||
transform = SimilarityTransform()
|
||||
transform.estimate(landmark, alignment)
|
||||
|
||||
matrix = transform.params[0:2, :]
|
||||
inverse_matrix = np.linalg.inv(transform.params)[0:2, :]
|
||||
|
||||
return matrix, inverse_matrix
|
||||
|
||||
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: Union[int, Tuple[int, int]] = 112,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array.
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The aligned face as a NumPy array.
|
||||
np.ndarray: The 2x3 transformation matrix used for alignment.
|
||||
"""
|
||||
# Get the transformation matrix
|
||||
M, M_inv = estimate_norm(landmark, image_size)
|
||||
|
||||
# Handle both int and tuple for warpAffine output size
|
||||
if isinstance(image_size, int):
|
||||
output_size = (image_size, image_size)
|
||||
else:
|
||||
output_size = image_size
|
||||
|
||||
# Warp the input image to align the face
|
||||
warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
|
||||
|
||||
return warped, M_inv
|
||||
|
||||
|
||||
def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
|
||||
"""Computing Similarity between two faces.
|
||||
|
||||
Args:
|
||||
feat1 (np.ndarray): First embedding.
|
||||
feat2 (np.ndarray): Second embedding.
|
||||
normalized (bool): Set True if the embeddings are already L2 normalized.
|
||||
|
||||
Returns:
|
||||
np.float32: Cosine similarity.
|
||||
"""
|
||||
feat1 = feat1.ravel()
|
||||
feat2 = feat2.ravel()
|
||||
if normalized:
|
||||
return np.dot(feat1, feat2)
|
||||
else:
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
|
||||
|
||||
def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
"""
|
||||
Apply center-based alignment, scaling, and rotation to an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
center (Tuple[float, float]): Center point (e.g., face center from bbox).
|
||||
output_size (int): Desired output image size (square).
|
||||
scale (float): Scaling factor to zoom in/out.
|
||||
rotation (float): Rotation angle in degrees (clockwise).
|
||||
|
||||
Returns:
|
||||
cropped (np.ndarray): Aligned and cropped image.
|
||||
M (np.ndarray): 2x3 affine transform matrix used.
|
||||
"""
|
||||
|
||||
# Convert rotation from degrees to radians
|
||||
rot = float(rotation) * np.pi / 180.0
|
||||
|
||||
# Scale the image
|
||||
t1 = SimilarityTransform(scale=scale)
|
||||
|
||||
# Translate the center point to the origin (after scaling)
|
||||
cx = center[0] * scale
|
||||
cy = center[1] * scale
|
||||
t2 = SimilarityTransform(translation=(-1 * cx, -1 * cy))
|
||||
|
||||
# Apply rotation around origin (center of face)
|
||||
t3 = SimilarityTransform(rotation=rot)
|
||||
|
||||
# Translate origin to center of output image
|
||||
t4 = SimilarityTransform(translation=(output_size / 2, output_size / 2))
|
||||
|
||||
# Combine all transformations in order: scale → center shift → rotate → recentralize
|
||||
t = t1 + t2 + t3 + t4
|
||||
|
||||
# Extract 2x3 affine matrix
|
||||
M = t.params[0:2]
|
||||
|
||||
# Warp the image using OpenCV
|
||||
cropped = cv2.warpAffine(image, M, (output_size, output_size), borderValue=0.0)
|
||||
|
||||
return cropped, M
|
||||
|
||||
|
||||
def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply a 2D affine transformation to an array of 2D points.
|
||||
|
||||
Args:
|
||||
points (np.ndarray): An (N, 2) array of 2D points.
|
||||
transform (np.ndarray): A (2, 3) affine transformation matrix.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Transformed (N, 2) array of points.
|
||||
"""
|
||||
transformed = np.zeros_like(points, dtype=np.float32)
|
||||
for i in range(points.shape[0]):
|
||||
point = np.array([points[i, 0], points[i, 1], 1.0], dtype=np.float32)
|
||||
result = np.dot(transform, point)
|
||||
transformed[i] = result[:2]
|
||||
|
||||
return transformed
|
||||
58
uniface/gaze/__init__.py
Normal file
@@ -0,0 +1,58 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
from .models import MobileGaze
|
||||
|
||||
|
||||
def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstimator:
|
||||
"""
|
||||
Factory function to create gaze estimators.
|
||||
|
||||
This function initializes and returns a gaze estimator instance based on the
|
||||
specified method. It acts as a high-level interface to the underlying
|
||||
model classes.
|
||||
|
||||
Args:
|
||||
method (str): The gaze estimation method to use.
|
||||
Options: 'mobilegaze' (default).
|
||||
**kwargs: Model-specific parameters passed to the estimator's constructor.
|
||||
For example, `model_name` can be used to select a specific
|
||||
backbone from `GazeWeights` enum (RESNET18, RESNET34, RESNET50,
|
||||
MOBILENET_V2, MOBILEONE_S0).
|
||||
|
||||
Returns:
|
||||
BaseGazeEstimator: An initialized gaze estimator instance ready for use.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default MobileGaze estimator (ResNet18 backbone)
|
||||
>>> estimator = create_gaze_estimator()
|
||||
|
||||
>>> # Create with MobileNetV2 backbone
|
||||
>>> from uniface.constants import GazeWeights
|
||||
>>> estimator = create_gaze_estimator(
|
||||
... 'mobilegaze',
|
||||
... model_name=GazeWeights.MOBILENET_V2
|
||||
... )
|
||||
|
||||
>>> # Use the estimator
|
||||
>>> pitch, yaw = estimator.estimate(face_crop)
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method in ('mobilegaze', 'mobile_gaze', 'gaze'):
|
||||
return MobileGaze(**kwargs)
|
||||
else:
|
||||
available = ['mobilegaze']
|
||||
raise ValueError(f"Unsupported gaze estimation method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'create_gaze_estimator',
|
||||
'MobileGaze',
|
||||
'BaseGazeEstimator',
|
||||
]
|
||||
108
uniface/gaze/base.py
Normal file
@@ -0,0 +1,108 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseGazeEstimator(ABC):
|
||||
"""
|
||||
Abstract base class for all gaze estimation models.
|
||||
|
||||
This class defines the common interface that all gaze estimators must implement,
|
||||
ensuring consistency across different gaze estimation methods. Gaze estimation
|
||||
predicts the direction a person is looking based on their face image.
|
||||
|
||||
The gaze direction is represented as pitch and yaw angles in radians:
|
||||
- Pitch: Vertical angle (positive = looking up, negative = looking down)
|
||||
- Yaw: Horizontal angle (positive = looking right, negative = looking left)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input face image for model inference.
|
||||
|
||||
This method should take a raw face crop and convert it into the format
|
||||
expected by the model's inference engine (e.g., normalized tensor).
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format with
|
||||
shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[float, float]:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into pitch and yaw angles in radians.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference. The format depends
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def estimate(self, face_image: np.ndarray) -> Tuple[float, float]:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the gaze direction.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
The face should be roughly centered and
|
||||
well-framed within the image.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians:
|
||||
- pitch: Vertical gaze angle (positive = up, negative = down)
|
||||
- yaw: Horizontal gaze angle (positive = right, negative = left)
|
||||
|
||||
Example:
|
||||
>>> estimator = create_gaze_estimator()
|
||||
>>> pitch, yaw = estimator.estimate(face_crop)
|
||||
>>> print(f"Looking: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the estimate method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> Tuple[float, float]:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `estimate` method.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
|
||||
"""
|
||||
return self.estimate(face_image)
|
||||
187
uniface/gaze/models.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import GazeWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
|
||||
__all__ = ['MobileGaze']
|
||||
|
||||
|
||||
class MobileGaze(BaseGazeEstimator):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation with ONNX Runtime.
|
||||
|
||||
MobileGaze is a gaze estimation model that predicts gaze direction from a single
|
||||
face image. It supports multiple backbone architectures including ResNet 18/34/50,
|
||||
MobileNetV2, and MobileOne S0. The model uses a classification approach with binned
|
||||
angles, which are then decoded to continuous pitch and yaw values.
|
||||
|
||||
The model outputs gaze direction as pitch (vertical) and yaw (horizontal) angles
|
||||
in radians.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
|
||||
Args:
|
||||
model_name (GazeWeights): The enum specifying the gaze model backbone to load.
|
||||
Options: RESNET18, RESNET34, RESNET50, MOBILENET_V2, MOBILEONE_S0.
|
||||
Defaults to `GazeWeights.RESNET18`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (448, 448).
|
||||
|
||||
Attributes:
|
||||
input_size (Tuple[int, int]): Model input dimensions.
|
||||
input_mean (list): Per-channel mean values for normalization (ImageNet).
|
||||
input_std (list): Per-channel std values for normalization (ImageNet).
|
||||
|
||||
Example:
|
||||
>>> from uniface.gaze import MobileGaze
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> gaze_estimator = MobileGaze()
|
||||
>>>
|
||||
>>> # Detect faces and estimate gaze for each
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox']
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
... print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: GazeWeights = GazeWeights.RESNET34,
|
||||
input_size: Tuple[int, int] = (448, 448),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing MobileGaze with model={model_name}, input_size={input_size}')
|
||||
|
||||
self.input_size = input_size
|
||||
self.input_mean = [0.485, 0.456, 0.406]
|
||||
self.input_std = [0.229, 0.224, 0.225]
|
||||
|
||||
# Model specific parameters for bin-based classification (Gaze360 config)
|
||||
self._bins = 90
|
||||
self._binwidth = 4
|
||||
self._angle_offset = 180
|
||||
self._idx_tensor = np.arange(self._bins, dtype=np.float32)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
input_shape = input_cfg.shape
|
||||
self.input_name = input_cfg.name
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update from model
|
||||
|
||||
# Get output configuration
|
||||
outputs = self.session.get_outputs()
|
||||
self.output_names = [output.name for output in outputs]
|
||||
|
||||
if len(self.output_names) != 2:
|
||||
raise ValueError(f'Expected 2 output nodes (pitch, yaw), got {len(self.output_names)}')
|
||||
|
||||
Logger.info(f'MobileGaze initialized with input size {self.input_size}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load gaze model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize gaze model: {e}') from e
|
||||
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess a face crop for gaze estimation.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, 3, H, W).
|
||||
"""
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Resize to model input size
|
||||
image = cv2.resize(image, self.input_size)
|
||||
|
||||
# Normalize to [0, 1] and apply normalization
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array(self.input_mean, dtype=np.float32)
|
||||
std = np.array(self.input_std, dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0).astype(np.float32)
|
||||
|
||||
return image
|
||||
|
||||
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
||||
"""Apply softmax along axis 1."""
|
||||
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
||||
return e_x / e_x.sum(axis=1, keepdims=True)
|
||||
|
||||
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into pitch and yaw angles in radians.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference. The format depends
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: A tuple of (pitch, yaw) angles in radians.
|
||||
"""
|
||||
pitch_logits, yaw_logits = outputs
|
||||
|
||||
# Convert logits to probabilities
|
||||
pitch_probs = self._softmax(pitch_logits)
|
||||
yaw_probs = self._softmax(yaw_logits)
|
||||
|
||||
# Compute expected bin index (soft-argmax)
|
||||
pitch_deg = np.sum(pitch_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
|
||||
# Convert degrees to radians
|
||||
pitch = np.radians(pitch_deg[0])
|
||||
yaw = np.radians(yaw_deg[0])
|
||||
|
||||
return pitch, yaw
|
||||
|
||||
def estimate(self, face_image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the gaze direction.
|
||||
"""
|
||||
input_tensor = self.preprocess(face_image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
pitch, yaw = self.postprocess((outputs[0], outputs[1]))
|
||||
|
||||
return pitch, yaw
|
||||
28
uniface/landmark/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .base import BaseLandmarker
|
||||
from .models import Landmark106
|
||||
|
||||
|
||||
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
"""
|
||||
Factory function to create facial landmark predictors.
|
||||
|
||||
Args:
|
||||
method (str): Landmark prediction method. Options: '106'.
|
||||
**kwargs: Model-specific parameters.
|
||||
|
||||
Returns:
|
||||
Initialized landmarker instance.
|
||||
"""
|
||||
method = method.lower()
|
||||
if method == '2d106det':
|
||||
return Landmark106(**kwargs)
|
||||
else:
|
||||
available = ['2d106det']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = ['create_landmarker', 'Landmark106', 'BaseLandmarker']
|
||||
32
uniface/landmark/base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predicts facial landmarks for a given face bounding box.
|
||||
|
||||
This method defines the standard interface for all landmark predictors.
|
||||
It takes a full image and a bounding box for a single face and returns
|
||||
the predicted keypoints for that face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (N, 2),
|
||||
where N is the number of landmarks.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
212
uniface/landmark/models.py
Normal file
@@ -0,0 +1,212 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark106']
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
"""Facial landmark model for predicting 106 facial keypoints.
|
||||
|
||||
This class implements the BaseLandmarker and provides an end-to-end
|
||||
pipeline for 106-point facial landmark detection. It handles model
|
||||
loading, preprocessing of a face crop based on a bounding box,
|
||||
inference, and post-processing to map landmarks back to the
|
||||
original image coordinates.
|
||||
|
||||
Args:
|
||||
model_name (LandmarkWeights): The enum specifying the landmark model to load.
|
||||
Defaults to `LandmarkWeights.DEFAULT`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (192, 192).
|
||||
|
||||
Example:
|
||||
>>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
|
||||
>>> # bbox = [x1, y1, x2, y2]
|
||||
>>>
|
||||
>>> landmarker = Landmark106()
|
||||
>>> landmarks = landmarker.get_landmarks(image, bbox)
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self):
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_metadata = self.session.get_inputs()[0]
|
||||
input_shape = input_metadata.shape
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
|
||||
|
||||
# Get input/output names
|
||||
self.input_names = [input.name for input in self.session.get_inputs()]
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
|
||||
# Determine landmark dimensions from output shape
|
||||
output_shape = self.session.get_outputs()[0].shape
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f'Model initialized with {self.lmk_num} landmarks')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
|
||||
This method takes a face bounding box, performs a center alignment to
|
||||
warp the face into the model's required input size, and then creates
|
||||
a normalized blob ready for the ONNX session.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
||||
- The preprocessed image blob ready for inference.
|
||||
- The affine transformation matrix used for alignment.
|
||||
"""
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
||||
scale = self.input_size[0] / (max(width, height) * 1.5)
|
||||
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True,
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
|
||||
"""Converts raw model predictions back to original image coordinates.
|
||||
|
||||
This method reshapes the model's flat output array into landmark points,
|
||||
denormalizes them to the model's input space, and then applies an
|
||||
inverse affine transformation to map them back to the original image space.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Raw landmark coordinates from the model output.
|
||||
transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of landmark points in the original image's coordinates.
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= self.input_size[0] // 2
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
return landmarks
|
||||
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""Predicts facial landmarks for the given image and face bounding box.
|
||||
|
||||
This is the main public method that orchestrates the full pipeline of
|
||||
preprocessing, inference, and post-processing.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == '__main__':
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
face_detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Webcam not available.')
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Frame capture failed.')
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face['bbox']
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
@@ -1,7 +1,25 @@
|
||||
import logging
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
Logger = logging.getLogger("retinaface")
|
||||
# Create logger for uniface
|
||||
Logger = logging.getLogger('uniface')
|
||||
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
|
||||
Logger.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Enable verbose logging for uniface.
|
||||
|
||||
Args:
|
||||
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
||||
|
||||
Example:
|
||||
>>> from uniface import enable_logging
|
||||
>>> enable_logging() # Show INFO logs
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
||||
Logger.addHandler(handler)
|
||||
Logger.setLevel(level)
|
||||
Logger.propagate = False
|
||||
|
||||
@@ -1,102 +1,115 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import requests
|
||||
|
||||
from uniface.log import Logger
|
||||
import uniface.constants as const
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
"""
|
||||
Ensures model weights are available by downloading if missing and verifying integrity with a SHA-256 hash.
|
||||
|
||||
Checks if the specified model weights file exists in `root`. If missing, downloads from a predefined URL.
|
||||
The file is then verified using its SHA-256 hash. If verification fails, the corrupted file is deleted,
|
||||
and an error is raised.
|
||||
|
||||
Args:
|
||||
model_name (str): Name of the model weights to verify or download.
|
||||
root (str, optional): Directory to store the model weights. Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is not found or if verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> # Download and verify 'retinaface_mnet025' weights
|
||||
>>> verify_model_weights('retinaface_mnet025')
|
||||
'/home/user/.uniface/models/retinaface_mnet025.onnx'
|
||||
|
||||
>>> # Use a custom directory
|
||||
>>> verify_model_weights('retinaface_r34', root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
os.makedirs(root, exist_ok=True)
|
||||
model_path = os.path.join(root, f'{model_name}.onnx')
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
url = const.MODEL_URLS.get(model_name)
|
||||
if not url:
|
||||
Logger.error(f"No URL found for model '{model_name}'")
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
Logger.info(f"Downloading '{model_name}' from {url}")
|
||||
download_file(url, model_path)
|
||||
Logger.info(f"Successfully '{model_name}' downloaded to {model_path}")
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning("Corrupted weight detected. Removing...")
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as file:
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
model_names = [
|
||||
'retinaface_mnet025',
|
||||
'retinaface_mnet050',
|
||||
'retinaface_mnet_v1',
|
||||
'retinaface_mnet_v2',
|
||||
'retinaface_r18',
|
||||
'retinaface_r34'
|
||||
]
|
||||
|
||||
# Download each model in the list
|
||||
for model_name in model_names:
|
||||
model_path = verify_model_weights(model_name)
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
import uniface.constants as const
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['verify_model_weights']
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
"""
|
||||
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
|
||||
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`), this function checks if
|
||||
the corresponding `.onnx` weight file exists locally. If not, it downloads the file from a predefined URL.
|
||||
After download, the file’s integrity is verified using a SHA-256 hash. If verification fails, the file is deleted
|
||||
and an error is raised.
|
||||
|
||||
Args:
|
||||
model_name (Enum): Model weight identifier (e.g., `RetinaFaceWeights.MNET_V2`, `ArcFaceWeights.RESNET`, etc.).
|
||||
root (str, optional): Directory to store or locate the model weights. Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is unknown or SHA-256 verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> from uniface.models import RetinaFaceWeights, verify_model_weights
|
||||
>>> verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
|
||||
|
||||
>>> verify_model_weights(RetinaFaceWeights.RESNET34, root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
os.makedirs(root, exist_ok=True)
|
||||
|
||||
# Keep model_name as enum for dictionary lookup
|
||||
url = const.MODEL_URLS.get(model_name)
|
||||
if not url:
|
||||
Logger.error(f"No URL found for model '{model_name}'")
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
file_ext = os.path.splitext(url)[1]
|
||||
model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
Logger.info(f"Downloading model '{model_name}' from {url}")
|
||||
try:
|
||||
download_file(url, model_path)
|
||||
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to download model '{model_name}': {e}")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'") from e
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning('Corrupted weight detected. Removing...')
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with (
|
||||
open(dest_path, 'wb') as file,
|
||||
tqdm(
|
||||
desc=f'Downloading {dest_path}',
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as progress,
|
||||
):
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
progress.update(len(chunk))
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f'Failed to download file from {url}. Error: {e}') from e
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f'Expected hash: {expected_hash}, but got: {actual_hash}')
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model_names = [model.value for model in const.RetinaFaceWeights]
|
||||
|
||||
# Download each model in the list
|
||||
for model_name in model_names:
|
||||
model_path = verify_model_weights(model_name)
|
||||
|
||||
99
uniface/onnx_utils.py
Normal file
@@ -0,0 +1,99 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""
|
||||
Get list of available ONNX Runtime execution providers for the current platform.
|
||||
|
||||
Automatically detects and prioritizes hardware acceleration:
|
||||
- CoreML on Apple Silicon (M1/M2/M3/M4)
|
||||
- CUDA on NVIDIA GPUs
|
||||
- CPU as fallback (always available)
|
||||
|
||||
Returns:
|
||||
List[str]: Ordered list of execution providers to use
|
||||
|
||||
Examples:
|
||||
>>> providers = get_available_providers()
|
||||
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On CPU-only: ['CPUExecutionProvider']
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = []
|
||||
|
||||
# Priority order: CoreML > CUDA > CPU
|
||||
if 'CoreMLExecutionProvider' in available:
|
||||
providers.append('CoreMLExecutionProvider')
|
||||
Logger.info('CoreML acceleration enabled (Apple Silicon)')
|
||||
|
||||
if 'CUDAExecutionProvider' in available:
|
||||
providers.append('CUDAExecutionProvider')
|
||||
Logger.info('CUDA acceleration enabled (NVIDIA GPU)')
|
||||
|
||||
# CPU is always available as fallback
|
||||
providers.append('CPUExecutionProvider')
|
||||
|
||||
if len(providers) == 1:
|
||||
Logger.info('Using CPU execution (no hardware acceleration detected)')
|
||||
|
||||
return providers
|
||||
|
||||
|
||||
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
|
||||
"""
|
||||
Create an ONNX Runtime inference session with optimal provider selection.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the ONNX model file
|
||||
providers (List[str], optional): List of providers to use.
|
||||
If None, automatically detects best available providers.
|
||||
|
||||
Returns:
|
||||
ort.InferenceSession: Configured ONNX Runtime session
|
||||
|
||||
Raises:
|
||||
RuntimeError: If session creation fails
|
||||
|
||||
Examples:
|
||||
>>> session = create_onnx_session("model.onnx")
|
||||
>>> # Automatically uses best available providers
|
||||
|
||||
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
|
||||
>>> # Force CPU-only execution
|
||||
"""
|
||||
if providers is None:
|
||||
providers = get_available_providers()
|
||||
|
||||
# Suppress ONNX Runtime warnings (e.g., CoreML partition warnings)
|
||||
# Log levels: 0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # Only show ERROR and FATAL
|
||||
|
||||
try:
|
||||
session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
|
||||
active_provider = session.get_providers()[0]
|
||||
Logger.debug(f'Session created with provider: {active_provider}')
|
||||
|
||||
# Show user-friendly message about which provider is being used
|
||||
provider_names = {
|
||||
'CoreMLExecutionProvider': 'CoreML (Apple Silicon)',
|
||||
'CUDAExecutionProvider': 'CUDA (NVIDIA GPU)',
|
||||
'CPUExecutionProvider': 'CPU',
|
||||
}
|
||||
provider_display = provider_names.get(active_provider, active_provider)
|
||||
Logger.debug(f'Model loaded with provider: {active_provider}')
|
||||
print(f'✓ Model loaded ({provider_display})')
|
||||
|
||||
return session
|
||||
except Exception as e:
|
||||
Logger.error(f'Failed to create ONNX session: {e}', exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize ONNX Runtime session: {e}') from e
|
||||
61
uniface/parsing/__init__.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Union
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
from .base import BaseFaceParser
|
||||
from .bisenet import BiSeNet
|
||||
|
||||
__all__ = ['BaseFaceParser', 'BiSeNet', 'create_face_parser']
|
||||
|
||||
|
||||
def create_face_parser(
|
||||
model_name: Union[str, ParsingWeights] = ParsingWeights.RESNET18,
|
||||
) -> BaseFaceParser:
|
||||
"""
|
||||
Factory function to create a face parsing model instance.
|
||||
|
||||
This function provides a convenient way to instantiate face parsing models
|
||||
without directly importing the specific model classes. It supports both
|
||||
string-based and enum-based model selection.
|
||||
|
||||
Args:
|
||||
model_name (Union[str, ParsingWeights]): The face parsing model to create.
|
||||
Can be either a string or a ParsingWeights enum value.
|
||||
Available options:
|
||||
- 'parsing_resnet18' or ParsingWeights.RESNET18 (default)
|
||||
- 'parsing_resnet34' or ParsingWeights.RESNET34
|
||||
|
||||
Returns:
|
||||
BaseFaceParser: An instance of the requested face parsing model.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model_name is not recognized.
|
||||
|
||||
Examples:
|
||||
>>> # Using enum
|
||||
>>> from uniface.parsing import create_face_parser
|
||||
>>> from uniface.constants import ParsingWeights
|
||||
>>> parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
>>>
|
||||
>>> # Using string
|
||||
>>> parser = create_face_parser('parsing_resnet18')
|
||||
>>>
|
||||
>>> # Parse a face image
|
||||
>>> mask = parser.parse(face_crop)
|
||||
"""
|
||||
# Convert string to enum if necessary
|
||||
if isinstance(model_name, str):
|
||||
try:
|
||||
model_name = ParsingWeights(model_name)
|
||||
except ValueError as e:
|
||||
valid_models = [e.value for e in ParsingWeights]
|
||||
raise ValueError(
|
||||
f"Unknown face parsing model: '{model_name}'. Valid options are: {', '.join(valid_models)}"
|
||||
) from e
|
||||
|
||||
# All parsing models use the same BiSeNet class
|
||||
return BiSeNet(model_name=model_name)
|
||||
106
uniface/parsing/base.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseFaceParser(ABC):
|
||||
"""
|
||||
Abstract base class for all face parsing models.
|
||||
|
||||
This class defines the common interface that all face parsing models must implement,
|
||||
ensuring consistency across different parsing methods. Face parsing segments a face
|
||||
image into semantic regions such as skin, eyes, nose, mouth, hair, etc.
|
||||
|
||||
The output is a segmentation mask where each pixel is assigned a class label
|
||||
representing a facial component.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input face image for model inference.
|
||||
|
||||
This method should take a raw face crop and convert it into the format
|
||||
expected by the model's inference engine (e.g., normalized tensor).
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format with
|
||||
shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess raw model outputs into a segmentation mask.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a segmentation mask at the original image size.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw outputs from the model inference.
|
||||
original_size (Tuple[int, int]): Original image size (width, height).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as the original image.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
The face should be roughly centered and
|
||||
well-framed within the image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image,
|
||||
where each pixel value represents a facial component class.
|
||||
|
||||
Example:
|
||||
>>> parser = create_face_parser()
|
||||
>>> mask = parser.parse(face_crop)
|
||||
>>> print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the parse method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `parse` method.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
return self.parse(face_image)
|
||||
166
uniface/parsing/bisenet.py
Normal file
@@ -0,0 +1,166 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseFaceParser
|
||||
|
||||
__all__ = ['BiSeNet']
|
||||
|
||||
|
||||
class BiSeNet(BaseFaceParser):
|
||||
"""
|
||||
BiSeNet: Bilateral Segmentation Network for Face Parsing with ONNX Runtime.
|
||||
|
||||
BiSeNet is a semantic segmentation model that segments a face image into
|
||||
different facial components such as skin, eyes, nose, mouth, hair, etc. The model
|
||||
uses a BiSeNet architecture with ResNet backbone and outputs a segmentation mask
|
||||
where each pixel is assigned a class label.
|
||||
|
||||
The model supports 19 facial component classes including:
|
||||
- Background, skin, eyebrows, eyes, nose, mouth, lips, ears, hair, etc.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/face-parsing
|
||||
|
||||
Args:
|
||||
model_name (ParsingWeights): The enum specifying the parsing model to load.
|
||||
Options: RESNET18, RESNET34.
|
||||
Defaults to `ParsingWeights.RESNET18`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (512, 512).
|
||||
|
||||
Attributes:
|
||||
input_size (Tuple[int, int]): Model input dimensions.
|
||||
input_mean (np.ndarray): Per-channel mean values for normalization (ImageNet).
|
||||
input_std (np.ndarray): Per-channel std values for normalization (ImageNet).
|
||||
|
||||
Example:
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> parser = BiSeNet()
|
||||
>>>
|
||||
>>> # Detect faces and parse each face
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox']
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... mask = parser.parse(face_crop)
|
||||
... print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ParsingWeights = ParsingWeights.RESNET18,
|
||||
input_size: Tuple[int, int] = (512, 512),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing BiSeNet with model={model_name}, input_size={input_size}')
|
||||
|
||||
self.input_size = input_size
|
||||
self.input_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
self.input_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
input_shape = input_cfg.shape
|
||||
self.input_name = input_cfg.name
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update from model
|
||||
|
||||
# Get output configuration
|
||||
outputs = self.session.get_outputs()
|
||||
self.output_names = [output.name for output in outputs]
|
||||
|
||||
Logger.info(f'BiSeNet initialized with input size {self.input_size}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load parsing model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize parsing model: {e}') from e
|
||||
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess a face image for parsing.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, 3, H, W).
|
||||
"""
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Resize to model input size
|
||||
image = cv2.resize(image, self.input_size, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Normalize to [0, 1] and apply normalization
|
||||
image = image.astype(np.float32) / 255.0
|
||||
image = (image - self.input_mean) / self.input_std
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0).astype(np.float32)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess model output to segmentation mask.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw model output.
|
||||
original_size (Tuple[int, int]): Original image size (width, height).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask resized to original dimensions.
|
||||
"""
|
||||
# Get the class with highest probability for each pixel
|
||||
predicted_mask = outputs.squeeze(0).argmax(0).astype(np.uint8)
|
||||
|
||||
# Resize back to original size
|
||||
restored_mask = cv2.resize(predicted_mask, original_size, interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
return restored_mask
|
||||
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
original_size = (face_image.shape[1], face_image.shape[0]) # (width, height)
|
||||
input_tensor = self.preprocess(face_image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
|
||||
return self.postprocess(outputs[0], original_size)
|
||||
0
uniface/py.typed
Normal file
64
uniface/recognition/__init__.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from .base import BaseRecognizer
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
"""
|
||||
Factory function to create face recognizers.
|
||||
|
||||
This function initializes and returns a face recognizer instance based on the
|
||||
specified method. It acts as a high-level interface to the underlying
|
||||
model classes like ArcFace, MobileFace, etc.
|
||||
|
||||
Args:
|
||||
method (str): The recognition method to use.
|
||||
Options: 'arcface' (default), 'mobileface', 'sphereface'.
|
||||
**kwargs: Model-specific parameters passed to the recognizer's constructor.
|
||||
For example, `model_name` can be used to select a specific
|
||||
pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`).
|
||||
|
||||
Returns:
|
||||
BaseRecognizer: An initialized recognizer instance ready for use.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default ArcFace recognizer
|
||||
>>> recognizer = create_recognizer()
|
||||
|
||||
>>> # Create a specific MobileFace recognizer
|
||||
>>> from uniface.constants import MobileFaceWeights
|
||||
>>> recognizer = create_recognizer(
|
||||
... 'mobileface',
|
||||
... model_name=MobileFaceWeights.MNET_V2
|
||||
... )
|
||||
|
||||
>>> # Create a SphereFace recognizer
|
||||
>>> recognizer = create_recognizer('sphereface')
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == 'arcface':
|
||||
return ArcFace(**kwargs)
|
||||
elif method == 'mobileface':
|
||||
return MobileFace(**kwargs)
|
||||
elif method == 'sphereface':
|
||||
return SphereFace(**kwargs)
|
||||
else:
|
||||
available = ['arcface', 'mobileface', 'sphereface']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'create_recognizer',
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
'BaseRecognizer',
|
||||
]
|
||||
156
uniface/recognition/base.py
Normal file
@@ -0,0 +1,156 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseRecognizer(ABC):
|
||||
"""
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
Initializes the model. Subclasses must call this.
|
||||
|
||||
Args:
|
||||
model_path (str): The direct path to the verified ONNX model.
|
||||
preprocessing (PreprocessConfig): The configuration for preprocessing.
|
||||
"""
|
||||
self.input_mean = preprocessing.input_mean
|
||||
self.input_std = preprocessing.input_std
|
||||
self.input_size = preprocessing.input_size
|
||||
|
||||
self.model_path = model_path
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Loads the ONNX model and prepares it for inference.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
# Initialize model session with available providers
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Extract input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
self.input_name = input_cfg.name
|
||||
|
||||
# Verify input dimensions match our configuration
|
||||
input_shape = input_cfg.shape
|
||||
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
if model_input_size != self.input_size:
|
||||
Logger.warning(f'Model input size {model_input_size} differs from configured size {self.input_size}')
|
||||
|
||||
# Extract output configuration
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
self.output_shape = self.session.get_outputs()[0].shape
|
||||
|
||||
assert len(self.output_names) == 1, 'Expected only one output node.'
|
||||
Logger.info(f'Successfully initialized face encoder from {self.model_path}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load face encoder model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
||||
|
||||
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the image: resize, normalize, and convert it to a blob.
|
||||
|
||||
Args:
|
||||
face_img: Input image in BGR format.
|
||||
|
||||
Returns:
|
||||
Preprocessed image as a NumPy array ready for inference.
|
||||
"""
|
||||
resized_img = cv2.resize(face_img, self.input_size)
|
||||
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
self.input_std, dtype=np.float32
|
||||
)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
||||
blob = np.expand_dims(blob, axis=0) # NCHW
|
||||
else:
|
||||
# Single-value normalization
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
resized_img,
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True, # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return blob
|
||||
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
|
||||
"""
|
||||
Extracts face embedding from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
|
||||
|
||||
Returns:
|
||||
Face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
# If landmarks are provided, align the face first
|
||||
if landmarks is not None:
|
||||
aligned_face, _ = face_alignment(image, landmarks, image_size=self.input_size)
|
||||
else:
|
||||
# Assume image is already aligned
|
||||
aligned_face = image
|
||||
|
||||
# Generate embedding from aligned face
|
||||
face_blob = self.preprocess(aligned_face)
|
||||
embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
|
||||
|
||||
return embedding
|
||||
|
||||
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Extracts a l2 normalized face embedding vector from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format).
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
Normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
embedding = self.get_embedding(image, landmarks)
|
||||
norm = np.linalg.norm(embedding)
|
||||
return embedding / norm if norm > 0 else embedding
|
||||
103
uniface/recognition/models.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from .base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
__all__ = ['ArcFace', 'MobileFace', 'SphereFace']
|
||||
|
||||
|
||||
class ArcFace(BaseRecognizer):
|
||||
"""ArcFace model for robust face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for ArcFace models. It handles the loading of specific
|
||||
ArcFace weights and sets up the appropriate default preprocessing.
|
||||
|
||||
Args:
|
||||
model_name (ArcFaceWeights): The specific ArcFace model variant to use.
|
||||
Defaults to `ArcFaceWeights.MNET`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for ArcFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import ArcFace
|
||||
>>> recognizer = ArcFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class MobileFace(BaseRecognizer):
|
||||
"""Lightweight MobileFaceNet model for fast face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for MobileFaceNet models. It is optimized for speed,
|
||||
making it suitable for edge devices.
|
||||
|
||||
Args:
|
||||
model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use.
|
||||
Defaults to `MobileFaceWeights.MNET_V2`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for MobileFaceNet is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import MobileFace
|
||||
>>> recognizer = MobileFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class SphereFace(BaseRecognizer):
|
||||
"""SphereFace model using angular margin for face recognition.
|
||||
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for SphereFace models, which were among the first to
|
||||
introduce angular margin loss functions.
|
||||
|
||||
Args:
|
||||
model_name (SphereFaceWeights): The specific SphereFace model variant to use.
|
||||
Defaults to `SphereFaceWeights.SPHERE20`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for SphereFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import SphereFace
|
||||
>>> recognizer = SphereFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
@@ -1,256 +0,0 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
import torch
|
||||
from typing import Tuple, List, Optional, Literal
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from uniface.common import (
|
||||
nms,
|
||||
resize_image,
|
||||
decode_boxes,
|
||||
generate_anchors,
|
||||
decode_landmarks
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace:
|
||||
"""
|
||||
A class for face detection using the RetinaFace model.
|
||||
|
||||
Args:
|
||||
model (str): Path or identifier of the model weights.
|
||||
conf_thresh (float): Confidence threshold for detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int): Maximum number of detections before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Maximum number of detections after NMS. Defaults to 750.
|
||||
dynamic_size (Optional[bool]): Whether to adjust anchor generation dynamically based on image size. Defaults to False.
|
||||
input_size (Optional[Tuple[int, int]]): Static input size for the model (width, height). Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Confidence threshold for filtering detections.
|
||||
nms_thresh (float): Threshold for NMS to remove duplicate detections.
|
||||
pre_nms_topk (int): Maximum detections to consider before applying NMS.
|
||||
post_nms_topk (int): Maximum detections retained after applying NMS.
|
||||
dynamic_size (bool): Indicates if input size and anchors are dynamically adjusted.
|
||||
input_size (Tuple[int, int]): The model's input image size.
|
||||
_model_path (str): Path to the model weights.
|
||||
_priors (torch.Tensor): Precomputed anchor boxes for static input size.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
pre_nms_topk: int = 5000,
|
||||
post_nms_topk: int = 750,
|
||||
dynamic_size: Optional[bool] = False,
|
||||
input_size: Optional[Tuple[int, int]] = (640, 640), # Default input size if dynamic_size=False
|
||||
) -> None:
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.pre_nms_topk = pre_nms_topk
|
||||
self.post_nms_topk = post_nms_topk
|
||||
self.dynamic_size = dynamic_size
|
||||
self.input_size = input_size
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={model}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"pre_nms_topk={pre_nms_topk}, post_nms_topk={post_nms_topk}, dynamic_size={dynamic_size}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not dynamic_size and input_size is not None:
|
||||
self._priors = generate_anchors(image_size=input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(model_path)
|
||||
self.input_name = self.session.get_inputs()[0].name
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}")
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess input image for model inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, C, H, W)
|
||||
"""
|
||||
image = np.float32(image) - np.array([104, 117, 123], dtype=np.float32)
|
||||
image = image.transpose(2, 0, 1) # HWC to CHW
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(None, {self.input_name: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: Optional[int] = 0,
|
||||
metric: Literal["default", "max"] = "default",
|
||||
center_weight: Optional[float] = 2.0
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (height, width, channels).
|
||||
max_num (int, optional): Maximum number of detections to return. Defaults to 1.
|
||||
metric (str, optional): Metric for ranking detections when `max_num` is specified.
|
||||
Options:
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float, optional): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Detection results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
|
||||
if self.dynamic_size:
|
||||
height, width, _ = image.shape
|
||||
self._priors = generate_anchors(image_size=(height, width)) # generate anchors for each input image
|
||||
resize_factor = 1.0 # No resizing
|
||||
else:
|
||||
image, resize_factor = resize_image(image, target_shape=self.input_size)
|
||||
|
||||
height, width, _ = image.shape
|
||||
image_tensor = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocessing
|
||||
detections, landmarks = self.postprocess(outputs, resize_factor, shape=(width, height))
|
||||
|
||||
if max_num > 0 and detections.shape[0] > max_num:
|
||||
# Calculate area of detections
|
||||
areas = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (height // 2, width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == 'max':
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(scores)[::-1][:max_num]
|
||||
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(torch.tensor(loc), self._priors).cpu().numpy()
|
||||
landmarks = decode_landmarks(torch.tensor(landmarks), self._priors).cpu().numpy()
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = nms(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Scale bounding boxes and landmarks to the original image size."""
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
landmark_scale = np.array([shape[0], shape[1]] * 5)
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
@@ -1,15 +0,0 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://opensource.org/licenses/MIT
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "0.1.1"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
@@ -1,38 +1,330 @@
|
||||
# Copyright 2024 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# Face parsing component names (19 classes)
|
||||
FACE_PARSING_LABELS = [
|
||||
'background',
|
||||
'skin',
|
||||
'l_brow',
|
||||
'r_brow',
|
||||
'l_eye',
|
||||
'r_eye',
|
||||
'eye_g',
|
||||
'l_ear',
|
||||
'r_ear',
|
||||
'ear_r',
|
||||
'nose',
|
||||
'mouth',
|
||||
'u_lip',
|
||||
'l_lip',
|
||||
'neck',
|
||||
'neck_l',
|
||||
'cloth',
|
||||
'hair',
|
||||
'hat',
|
||||
]
|
||||
|
||||
def draw_detections(image, detections, vis_threshold=0.6):
|
||||
# Color palette for face parsing visualization
|
||||
FACE_PARSING_COLORS = [
|
||||
[0, 0, 0],
|
||||
[255, 85, 0],
|
||||
[255, 170, 0],
|
||||
[255, 0, 85],
|
||||
[255, 0, 170],
|
||||
[0, 255, 0],
|
||||
[85, 255, 0],
|
||||
[170, 255, 0],
|
||||
[0, 255, 85],
|
||||
[0, 255, 170],
|
||||
[0, 0, 255],
|
||||
[85, 0, 255],
|
||||
[170, 0, 255],
|
||||
[0, 85, 255],
|
||||
[0, 170, 255],
|
||||
[255, 255, 0],
|
||||
[255, 255, 85],
|
||||
[255, 255, 170],
|
||||
[255, 0, 255],
|
||||
]
|
||||
|
||||
|
||||
def draw_detections(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
bboxes: Union[List[np.ndarray], List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[List[np.ndarray], List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6,
|
||||
draw_score: bool = False,
|
||||
fancy_bbox: bool = True,
|
||||
):
|
||||
"""
|
||||
Draw bounding boxes and landmarks on the image.
|
||||
Draws bounding boxes, landmarks, and optional scores on an image.
|
||||
|
||||
Args:
|
||||
image (ndarray): Image to draw detections on.
|
||||
detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
|
||||
vis_threshold (float): Confidence threshold for filtering detections.
|
||||
image: Input image to draw on.
|
||||
bboxes: List of bounding boxes [x1, y1, x2, y2].
|
||||
scores: List of confidence scores.
|
||||
landmarks: List of landmark sets with shape (5, 2).
|
||||
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
||||
draw_score: Whether to draw confidence scores. Defaults to False.
|
||||
"""
|
||||
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
# Unpack detections
|
||||
boxes, landmarks = detections
|
||||
scores = boxes[:, 4]
|
||||
# Calculate line thickness based on image size
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Filter detections by confidence threshold
|
||||
filtered = scores >= vis_threshold
|
||||
boxes = boxes[filtered, :4].astype(np.int32)
|
||||
landmarks = landmarks[filtered]
|
||||
scores = scores[filtered]
|
||||
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
||||
|
||||
print(f"#faces: {len(scores)}")
|
||||
for i in keep_indices:
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
score = scores[i]
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
|
||||
# Draw bounding boxes, scores, and landmarks
|
||||
for box, score, landmark in zip(boxes, scores, landmarks):
|
||||
cv2.rectangle(image, box[:2], box[2:], (0, 0, 255), 2)
|
||||
cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
for point, color in zip(landmark, _colors):
|
||||
cv2.circle(image, tuple(point), 2, color, -1)
|
||||
# Calculate dynamic font scale based on bbox height
|
||||
bbox_h = bbox[3] - bbox[1]
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness, proportion=0.2)
|
||||
else:
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 255, 0), line_thickness)
|
||||
|
||||
# Draw confidence score with background
|
||||
if draw_score:
|
||||
text = f'{score:.2f}'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(bbox[0], bbox[1] - text_height - baseline - 10),
|
||||
(bbox[0] + text_width + 10, bbox[1]),
|
||||
(0, 255, 0),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(bbox[0] + 5, bbox[1] - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(0, 0, 0),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), line_thickness + 1, colors[j], -1)
|
||||
|
||||
|
||||
def draw_fancy_bbox(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
color: Tuple[int, int, int] = (0, 255, 0),
|
||||
thickness: int = 3,
|
||||
proportion: float = 0.2,
|
||||
):
|
||||
"""
|
||||
Draws a bounding box with fancy corners on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
color: Color of the bounding box. Defaults to green.
|
||||
thickness: Thickness of the bounding box lines. Defaults to 3.
|
||||
proportion: Proportion of the corner length to the width/height of the bounding box. Defaults to 0.2.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
width = x2 - x1
|
||||
height = y2 - y1
|
||||
|
||||
corner_length = int(proportion * min(width, height))
|
||||
|
||||
# Draw the rectangle
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
|
||||
|
||||
# Top-left corner
|
||||
cv2.line(image, (x1, y1), (x1 + corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x1, y1), (x1, y1 + corner_length), color, thickness)
|
||||
|
||||
# Top-right corner
|
||||
cv2.line(image, (x2, y1), (x2 - corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x2, y1), (x2, y1 + corner_length), color, thickness)
|
||||
|
||||
# Bottom-left corner
|
||||
cv2.line(image, (x1, y2), (x1, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x1, y2), (x1 + corner_length, y2), color, thickness)
|
||||
|
||||
# Bottom-right corner
|
||||
cv2.line(image, (x2, y2), (x2, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x2, y2), (x2 - corner_length, y2), color, thickness)
|
||||
|
||||
|
||||
def draw_gaze(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
pitch: np.ndarray,
|
||||
yaw: np.ndarray,
|
||||
*,
|
||||
draw_bbox: bool = True,
|
||||
fancy_bbox: bool = True,
|
||||
draw_angles: bool = True,
|
||||
):
|
||||
"""
|
||||
Draws gaze direction with optional bounding box on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Face bounding box [x1, y1, x2, y2].
|
||||
pitch: Vertical gaze angle in radians.
|
||||
yaw: Horizontal gaze angle in radians.
|
||||
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
||||
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to False.
|
||||
"""
|
||||
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
||||
|
||||
# Calculate dynamic line thickness based on image size (same as draw_detections)
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Calculate dynamic font scale based on bbox height (same as draw_detections)
|
||||
bbox_h = y_max - y_min
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box if requested
|
||||
if draw_bbox:
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness)
|
||||
else:
|
||||
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), line_thickness)
|
||||
|
||||
# Calculate center of the bounding box
|
||||
x_center = (x_min + x_max) // 2
|
||||
y_center = (y_min + y_max) // 2
|
||||
|
||||
# Calculate the direction of the gaze
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(pitch) * np.cos(yaw))
|
||||
dy = int(-length * np.sin(yaw))
|
||||
|
||||
point1 = (x_center, y_center)
|
||||
point2 = (x_center + dx, y_center + dy)
|
||||
|
||||
# Calculate dynamic center point radius based on line thickness
|
||||
center_radius = max(line_thickness + 1, 4)
|
||||
|
||||
# Draw gaze direction
|
||||
cv2.circle(image, (x_center, y_center), radius=center_radius, color=(0, 0, 255), thickness=-1)
|
||||
cv2.arrowedLine(
|
||||
image,
|
||||
point1,
|
||||
point2,
|
||||
color=(0, 0, 255),
|
||||
thickness=line_thickness,
|
||||
line_type=cv2.LINE_AA,
|
||||
tipLength=0.25,
|
||||
)
|
||||
|
||||
# Draw angle values
|
||||
if draw_angles:
|
||||
text = f'P:{np.degrees(pitch):.0f}deg Y:{np.degrees(yaw):.0f}deg'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle for text
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(x_min, y_min - text_height - baseline - 10),
|
||||
(x_min + text_width + 10, y_min),
|
||||
(0, 0, 255),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(x_min + 5, y_min - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(255, 255, 255),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
|
||||
def vis_parsing_maps(
|
||||
image: np.ndarray,
|
||||
segmentation_mask: np.ndarray,
|
||||
*,
|
||||
save_image: bool = False,
|
||||
save_path: str = 'result.png',
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Visualizes face parsing segmentation mask by overlaying colored regions on the image.
|
||||
|
||||
Args:
|
||||
image: Input face image in RGB format with shape (H, W, 3).
|
||||
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
|
||||
value represents a facial component class (0-18).
|
||||
save_image: Whether to save the visualization to disk. Defaults to False.
|
||||
save_path: Path to save the visualization if save_image is True.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Blended image with segmentation overlay in BGR format.
|
||||
|
||||
Example:
|
||||
>>> import cv2
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface.visualization import vis_parsing_maps
|
||||
>>>
|
||||
>>> parser = BiSeNet()
|
||||
>>> face_image = cv2.imread('face.jpg')
|
||||
>>> mask = parser.parse(face_image)
|
||||
>>>
|
||||
>>> # Visualize
|
||||
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
>>> result = vis_parsing_maps(face_rgb, mask)
|
||||
>>> cv2.imwrite('parsed_face.jpg', result)
|
||||
"""
|
||||
# Create numpy arrays for image and segmentation mask
|
||||
image = np.array(image).copy().astype(np.uint8)
|
||||
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
|
||||
|
||||
# Create a color mask
|
||||
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
|
||||
|
||||
num_classes = np.max(segmentation_mask)
|
||||
|
||||
for class_index in range(1, num_classes + 1):
|
||||
class_pixels = np.where(segmentation_mask == class_index)
|
||||
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
|
||||
|
||||
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
|
||||
|
||||
# Convert image to BGR format for blending
|
||||
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Blend the image with the segmentation mask
|
||||
blended_image = cv2.addWeighted(bgr_image, 0.6, segmentation_mask_color, 0.4, 0)
|
||||
|
||||
# Save the result if required
|
||||
if save_image:
|
||||
cv2.imwrite(save_path, blended_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
return blended_image
|
||||
|
||||