mirror of
https://github.com/yakhyo/uniface.git
synced 2026-05-19 15:41:29 +00:00
Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3b6d0a35a9 | ||
|
|
0bd808bcef | ||
|
|
9edf8b6b3d | ||
|
|
efb40f2e91 | ||
|
|
376e7bc488 | ||
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc | ||
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 | ||
|
|
da8a5cf35b | ||
|
|
3982d677a9 | ||
|
|
f4458f0550 | ||
|
|
637316f077 | ||
|
|
6b1d2a1ce6 | ||
|
|
a5e97ac484 | ||
|
|
0c93598007 | ||
|
|
779952e3f8 | ||
|
|
39b50b62bd | ||
|
|
db7532ecf1 | ||
|
|
4b8dc2c0f9 | ||
|
|
0a2a10e165 | ||
|
|
84cda5f56c | ||
|
|
0771a7959a | ||
|
|
15947eb605 | ||
|
|
1ccc4f6b77 | ||
|
|
189755a1a6 |
BIN
.github/logos/gaze_crop.png
vendored
Normal file
BIN
.github/logos/gaze_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
Normal file
BIN
.github/logos/gaze_org.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 673 KiB |
38
.github/workflows/ci.yml
vendored
38
.github/workflows/ci.yml
vendored
@@ -10,14 +10,31 @@ on:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +44,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +55,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +73,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +95,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
38
.github/workflows/docs.yml
vendored
Normal file
38
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Deploy docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git-committers and git-revision-date plugins
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs
|
||||
17
.github/workflows/publish.yml
vendored
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
41
.pre-commit-config.yaml
Normal file
41
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^mkdocs.yml$
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
190
CONTRIBUTING.md
Normal file
190
CONTRIBUTING.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Contributing to UniFace
|
||||
|
||||
Thank you for considering contributing to UniFace! We welcome contributions of all kinds.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
- Use GitHub Issues to report bugs or suggest features
|
||||
- Include clear descriptions and reproducible examples
|
||||
- Check existing issues before creating new ones
|
||||
|
||||
### Pull Requests
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.11+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
399
MODELS.md
399
MODELS.md
@@ -1,399 +0,0 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|---------------------|--------|--------|--------|--------|--------|----------------------------|
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|-----------------|--------|-------|--------|--------|--------|----------------------------|
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|-------------|--------|-------|----------------------------|
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-----------------|-----------------|--------|------|-------|-------|-------|----------|--------------------|
|
||||
| `MNET_025` | MobileNetV1 0.25| 0.36M | 1MB | 98.76%| 92.02%| 82.37%| 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55%| 94.87%| 86.89%| 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30%| 93.77%| 85.29%| 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53%| 94.56%| 86.79%| 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-------------|----------|--------|------|-------|-------|-------|----------|----------------------|
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67%| 95.61%| 88.75%| 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72%| 95.64%| 89.92%| 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
|------------|--------|--------|------|-----------------------------|
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
|------------|-------------|--------|------|-------------------|
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
|--------------|---------|--------|------|-----------------------|
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection Guide
|
||||
|
||||
### By Use Case
|
||||
|
||||
#### Mobile/Edge Devices
|
||||
- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `MobileFace(MNET_V2)`
|
||||
- **Priority**: Speed, small model size
|
||||
|
||||
#### Real-Time Applications (Webcam, Video)
|
||||
- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `ArcFace(MNET)`
|
||||
- **Priority**: Speed-accuracy balance
|
||||
|
||||
#### High-Accuracy Applications (Security, Verification)
|
||||
- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Maximum accuracy
|
||||
|
||||
#### Server/Cloud Deployment
|
||||
- **Detection**: `SCRFD(SCRFD_10G)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Accuracy, batch processing
|
||||
|
||||
---
|
||||
|
||||
### By Hardware
|
||||
|
||||
#### Apple Silicon (M1/M2/M3/M4)
|
||||
**Recommended**: All models work well with ARM64 optimizations (automatically included)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
|
||||
|
||||
**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### NVIDIA GPU (CUDA)
|
||||
**Recommended**: Larger models for maximum throughput
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
|
||||
- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
|
||||
- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
|
||||
|
||||
#### CPU Only
|
||||
**Recommended**: Lightweight models
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
|
||||
|
||||
**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
|
||||
|
||||
---
|
||||
|
||||
## Benchmark Details
|
||||
|
||||
### How to Benchmark
|
||||
|
||||
Run benchmarks on your own hardware:
|
||||
|
||||
```bash
|
||||
# Detection speed
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
|
||||
# Compare models
|
||||
python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
|
||||
python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
|
||||
```
|
||||
|
||||
### Accuracy Metrics Explained
|
||||
|
||||
- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
|
||||
- **Easy**: Large faces (>50px), clear backgrounds
|
||||
- **Medium**: Medium-sized faces (30-50px), moderate occlusion
|
||||
- **Hard**: Small faces (<30px), heavy occlusion, blur
|
||||
|
||||
*Accuracy values are from the original papers - see references below*
|
||||
|
||||
- **Model Size**: ONNX model file size (affects download time and memory)
|
||||
- **Params**: Number of model parameters (affects inference speed)
|
||||
|
||||
### Important Notes
|
||||
|
||||
1. **Speed varies by**:
|
||||
- Image resolution
|
||||
- Number of faces in image
|
||||
- Hardware (CPU/GPU/CoreML)
|
||||
- Batch size
|
||||
- Operating system
|
||||
|
||||
2. **Accuracy varies by**:
|
||||
- Image quality
|
||||
- Lighting conditions
|
||||
- Face pose and occlusion
|
||||
- Demographic factors
|
||||
|
||||
3. **Always benchmark on your specific use case** before choosing a model
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
|
||||
372
QUICKSTART.md
372
QUICKSTART.md
@@ -1,372 +0,0 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
448
README.md
448
README.md
@@ -1,441 +1,125 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=80%>
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
> 💬 **Have questions?** [Chat with this codebase on DeepWiki](https://deepwiki.com/yakhyo/uniface) - AI-powered docs that let you ask anything about UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
- **Face Detection** — RetinaFace, SCRFD, and YOLOv5-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Facial Landmarks** — 106-point landmark localization
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes)
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
# Standard installation
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Platform-Specific Installation
|
||||
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
# GPU support (CUDA)
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
# From source
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
cd uniface && pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Face Detection
|
||||
## Quick Example
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize detector
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
## Documentation
|
||||
|
||||
### Webcam Face Detection
|
||||
📚 **Full documentation**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface/)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
| [Quickstart](https://yakhyo.github.io/uniface/quickstart/) | Get up and running in 5 minutes |
|
||||
| [Model Zoo](https://yakhyo.github.io/uniface/models/) | All models, benchmarks, and selection guide |
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
### Jupyter Notebooks
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
### Logging Configuration
|
||||
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Development
|
||||
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Format code
|
||||
black uniface/
|
||||
isort uniface/
|
||||
```
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) — RetinaFace training
|
||||
- [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) — YOLOv5-Face ONNX
|
||||
- [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) — ArcFace, MobileFace, SphereFace
|
||||
- [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) — BiSeNet face parsing
|
||||
- [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) — MobileGaze training
|
||||
- [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) — MiniFASNet inference
|
||||
- [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) — FairFace attributes
|
||||
- [deepinsight/insightface](https://github.com/deepinsight/insightface) — Model architectures
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
||||
BIN
assets/einstien.png
Normal file
BIN
assets/einstien.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
BIN
assets/scientists.png
Normal file
BIN
assets/scientists.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
BIN
docs/assets/logo.png
Normal file
BIN
docs/assets/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
BIN
docs/assets/logo.webp
Normal file
BIN
docs/assets/logo.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
191
docs/concepts/coordinate-systems.md
Normal file
191
docs/concepts/coordinate-systems.md
Normal file
@@ -0,0 +1,191 @@
|
||||
# Coordinate Systems
|
||||
|
||||
This page explains the coordinate formats used in UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Image Coordinates
|
||||
|
||||
All coordinates use **pixel-based, top-left origin**:
|
||||
|
||||
```
|
||||
(0, 0) ────────────────► x (width)
|
||||
│
|
||||
│ Image
|
||||
│
|
||||
▼
|
||||
y (height)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bounding Box Format
|
||||
|
||||
Bounding boxes use `[x1, y1, x2, y2]` format (top-left and bottom-right corners):
|
||||
|
||||
```
|
||||
(x1, y1) ─────────────────┐
|
||||
│ │
|
||||
│ Face │
|
||||
│ │
|
||||
└─────────────────────┘ (x2, y2)
|
||||
```
|
||||
|
||||
### Accessing Coordinates
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Direct access
|
||||
x1, y1, x2, y2 = face.bbox
|
||||
|
||||
# As properties
|
||||
bbox_xyxy = face.bbox_xyxy # [x1, y1, x2, y2]
|
||||
bbox_xywh = face.bbox_xywh # [x1, y1, width, height]
|
||||
```
|
||||
|
||||
### Conversion
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# xyxy → xywh
|
||||
def xyxy_to_xywh(bbox):
|
||||
x1, y1, x2, y2 = bbox
|
||||
return np.array([x1, y1, x2 - x1, y2 - y1])
|
||||
|
||||
# xywh → xyxy
|
||||
def xywh_to_xyxy(bbox):
|
||||
x, y, w, h = bbox
|
||||
return np.array([x, y, x + w, y + h])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Landmarks
|
||||
|
||||
### 5-Point Landmarks (Detection)
|
||||
|
||||
Returned by all detection models:
|
||||
|
||||
```python
|
||||
landmarks = face.landmarks # Shape: (5, 2)
|
||||
```
|
||||
|
||||
| Index | Point |
|
||||
|-------|-------|
|
||||
| 0 | Left Eye |
|
||||
| 1 | Right Eye |
|
||||
| 2 | Nose Tip |
|
||||
| 3 | Left Mouth Corner |
|
||||
| 4 | Right Mouth Corner |
|
||||
|
||||
```
|
||||
0 ● ● 1
|
||||
|
||||
● 2
|
||||
|
||||
3 ● ● 4
|
||||
```
|
||||
|
||||
### 106-Point Landmarks
|
||||
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
# Shape: (106, 2)
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Face Crop
|
||||
|
||||
To crop a face from an image:
|
||||
|
||||
```python
|
||||
def crop_face(image, bbox, margin=0):
|
||||
"""Crop face with optional margin."""
|
||||
h, w = image.shape[:2]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Add margin
|
||||
if margin > 0:
|
||||
bw, bh = x2 - x1, y2 - y1
|
||||
x1 = max(0, x1 - int(bw * margin))
|
||||
y1 = max(0, y1 - int(bh * margin))
|
||||
x2 = min(w, x2 + int(bw * margin))
|
||||
y2 = min(h, y2 + int(bh * margin))
|
||||
|
||||
return image[y1:y2, x1:x2]
|
||||
|
||||
# Usage
|
||||
face_crop = crop_face(image, face.bbox, margin=0.1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Angles
|
||||
|
||||
Gaze estimation returns pitch and yaw in **radians**:
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Angles in radians
|
||||
pitch = result.pitch # Vertical: + = up, - = down
|
||||
yaw = result.yaw # Horizontal: + = right, - = left
|
||||
|
||||
# Convert to degrees
|
||||
import numpy as np
|
||||
pitch_deg = np.degrees(pitch)
|
||||
yaw_deg = np.degrees(yaw)
|
||||
```
|
||||
|
||||
**Angle Reference:**
|
||||
|
||||
```
|
||||
pitch = +90° (up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(left) │ (right)
|
||||
│
|
||||
pitch = -90° (down)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
# Output: 112x112 aligned face image
|
||||
```
|
||||
|
||||
The alignment transforms faces to a canonical pose for better recognition accuracy.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Data types reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
204
docs/concepts/execution-providers.md
Normal file
204
docs/concepts/execution-providers.md
Normal file
@@ -0,0 +1,204 @@
|
||||
# Execution Providers
|
||||
|
||||
UniFace uses ONNX Runtime for model inference, which supports multiple hardware acceleration backends.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Provider Selection
|
||||
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
2. **CoreMLExecutionProvider** - Apple Silicon
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
providers = ort.get_available_providers()
|
||||
print("Available providers:", providers)
|
||||
```
|
||||
|
||||
**Example outputs:**
|
||||
|
||||
=== "macOS (Apple Silicon)"
|
||||
|
||||
```
|
||||
['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Linux (NVIDIA GPU)"
|
||||
|
||||
```
|
||||
['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Windows (CPU)"
|
||||
|
||||
```
|
||||
['CPUExecutionProvider']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Setup
|
||||
|
||||
### Apple Silicon (M1/M2/M3/M4)
|
||||
|
||||
No additional setup required. ARM64 optimizations are built into `onnxruntime`:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Verify ARM64:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
!!! tip "Performance"
|
||||
Apple Silicon Macs use CoreML acceleration automatically, providing excellent performance for face analysis tasks.
|
||||
|
||||
---
|
||||
|
||||
### NVIDIA GPU (CUDA)
|
||||
|
||||
Install with GPU support:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- Compatible NVIDIA driver
|
||||
|
||||
Verify CUDA:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
||||
print("CUDA is available!")
|
||||
else:
|
||||
print("CUDA not available, using CPU")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU Fallback
|
||||
|
||||
CPU execution is always available:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms without additional configuration.
|
||||
|
||||
---
|
||||
|
||||
## Internal API
|
||||
|
||||
For advanced use cases, you can access the provider utilities:
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Models use create_onnx_session() internally
|
||||
# which auto-selects the best provider
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### 1. Use GPU When Available
|
||||
|
||||
For batch processing or real-time applications, GPU acceleration provides significant speedups:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### 2. Optimize Input Size
|
||||
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
```
|
||||
|
||||
### 3. Batch Processing
|
||||
|
||||
Process multiple images to maximize GPU utilization:
|
||||
|
||||
```python
|
||||
# Process images in batch (GPU-efficient)
|
||||
for image_path in image_paths:
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
# ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### CUDA Not Detected
|
||||
|
||||
1. Verify CUDA installation:
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check CUDA version compatibility with ONNX Runtime
|
||||
|
||||
3. Reinstall with GPU support:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using ARM64 Python (not Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Cache & Offline](model-cache-offline.md) - Model management
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning parameters
|
||||
218
docs/concepts/inputs-outputs.md
Normal file
218
docs/concepts/inputs-outputs.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# Inputs & Outputs
|
||||
|
||||
This page describes the data types used throughout UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Input: Images
|
||||
|
||||
All models accept NumPy arrays in **BGR format** (OpenCV default):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
# Load image (BGR format)
|
||||
image = cv2.imread("photo.jpg")
|
||||
print(f"Shape: {image.shape}") # (H, W, 3)
|
||||
print(f"Dtype: {image.dtype}") # uint8
|
||||
```
|
||||
|
||||
!!! warning "Color Format"
|
||||
UniFace expects **BGR** format (OpenCV default). If using PIL or other libraries, convert first:
|
||||
|
||||
```python
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
pil_image = Image.open("photo.jpg")
|
||||
bgr_image = np.array(pil_image)[:, :, ::-1] # RGB → BGR
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output: Face Dataclass
|
||||
|
||||
Detection returns a list of `Face` objects:
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
# Required (from detection)
|
||||
bbox: np.ndarray # [x1, y1, x2, y2]
|
||||
confidence: float # 0.0 to 1.0
|
||||
landmarks: np.ndarray # (5, 2) or (106, 2)
|
||||
|
||||
# Optional (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None # 0=Female, 1=Male
|
||||
age: int | None = None # Years
|
||||
age_group: str | None = None # "20-29", etc.
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
```
|
||||
|
||||
### Properties
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Bounding box formats
|
||||
face.bbox_xyxy # [x1, y1, x2, y2] - same as bbox
|
||||
face.bbox_xywh # [x1, y1, width, height]
|
||||
|
||||
# Gender as string
|
||||
face.sex # "Female" or "Male" (None if not predicted)
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Compute similarity with another face
|
||||
similarity = face1.compute_similarity(face2)
|
||||
|
||||
# Convert to dictionary
|
||||
face_dict = face.to_dict()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Result Types
|
||||
|
||||
### GazeResult
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GazeResult:
|
||||
pitch: float # Vertical angle (radians), + = up
|
||||
yaw: float # Horizontal angle (radians), + = right
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°")
|
||||
print(f"Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SpoofingResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class SpoofingResult:
|
||||
is_real: bool # True = real, False = fake
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### AttributeResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class AttributeResult:
|
||||
gender: int # 0=Female, 1=Male
|
||||
age: int | None # Years (AgeGender model)
|
||||
age_group: str | None # "20-29" (FairFace model)
|
||||
race: str | None # Race label (FairFace model)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
return "Female" if self.gender == 0 else "Male"
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
# AgeGender model
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
|
||||
# FairFace model
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### EmotionResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class EmotionResult:
|
||||
emotion: str # "Happy", "Sad", etc.
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
Face recognition models return normalized 512-dimensional embeddings:
|
||||
|
||||
```python
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f"Shape: {embedding.shape}") # (1, 512)
|
||||
print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
```
|
||||
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parsing Masks
|
||||
|
||||
Face parsing returns a segmentation mask:
|
||||
|
||||
```python
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Shape: {mask.shape}") # (H, W)
|
||||
print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
```
|
||||
|
||||
**19 Classes:**
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Coordinate Systems](coordinate-systems.md) - Bbox and landmark formats
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning confidence thresholds
|
||||
218
docs/concepts/model-cache-offline.md
Normal file
218
docs/concepts/model-cache-offline.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# Model Cache & Offline Use
|
||||
|
||||
UniFace automatically downloads and caches models. This page explains how model management works.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Download
|
||||
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
|
||||
# Subsequent runs: loads from cache
|
||||
detector = RetinaFace() # Instant
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cache Location
|
||||
|
||||
Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
```
|
||||
|
||||
**Example structure:**
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mv2.onnx
|
||||
├── w600k_mbf.onnx
|
||||
├── 2d106det.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
|
||||
```bash
|
||||
python tools/download_model.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Offline Use
|
||||
|
||||
For air-gapped or offline environments:
|
||||
|
||||
### 1. Pre-download models
|
||||
|
||||
On a connected machine:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Copy from: {path}")
|
||||
```
|
||||
|
||||
### 2. Copy to target machine
|
||||
|
||||
```bash
|
||||
# Copy the entire cache directory
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Verification
|
||||
|
||||
Models are verified with SHA-256 checksums:
|
||||
|
||||
```python
|
||||
from uniface.constants import MODEL_SHA256, RetinaFaceWeights
|
||||
|
||||
# Check expected checksum
|
||||
expected = MODEL_SHA256[RetinaFaceWeights.MNET_V2]
|
||||
print(f"Expected SHA256: {expected}")
|
||||
```
|
||||
|
||||
If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
### Detection Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| RetinaFace MNET_025 | 1.7 MB | ✅ |
|
||||
| RetinaFace MNET_V2 | 3.5 MB | ✅ |
|
||||
| RetinaFace RESNET34 | 56 MB | ✅ |
|
||||
| SCRFD 500M | 2.5 MB | ✅ |
|
||||
| SCRFD 10G | 17 MB | ✅ |
|
||||
| YOLOv5n-Face | 11 MB | ✅ |
|
||||
| YOLOv5s-Face | 28 MB | ✅ |
|
||||
| YOLOv5m-Face | 82 MB | ✅ |
|
||||
|
||||
### Recognition Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| ArcFace MNET | 8 MB | ✅ |
|
||||
| ArcFace RESNET | 166 MB | ✅ |
|
||||
| MobileFace MNET_V2 | 4 MB | ✅ |
|
||||
| SphereFace SPHERE20 | 50 MB | ✅ |
|
||||
|
||||
### Other Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| Landmark106 | 14 MB | ✅ |
|
||||
| AgeGender | 8 MB | ✅ |
|
||||
| FairFace | 44 MB | ✅ |
|
||||
| Gaze ResNet34 | 82 MB | ✅ |
|
||||
| BiSeNet ResNet18 | 51 MB | ✅ |
|
||||
| MiniFASNet V2 | 1.2 MB | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
rm -rf ~/.uniface/models/
|
||||
|
||||
# Remove specific model
|
||||
rm ~/.uniface/models/retinaface_mv2.onnx
|
||||
```
|
||||
|
||||
Models will be re-downloaded on next use.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tune model parameters
|
||||
- [Detection Module](../modules/detection.md) - Detection model details
|
||||
196
docs/concepts/overview.md
Normal file
196
docs/concepts/overview.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Overview
|
||||
|
||||
UniFace is designed as a modular, production-ready face analysis library. This page explains the architecture and design principles.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
UniFace follows a modular architecture where each face analysis task is handled by a dedicated module:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph Input
|
||||
IMG[Image/Frame]
|
||||
end
|
||||
|
||||
subgraph Detection
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face]
|
||||
end
|
||||
|
||||
subgraph Analysis
|
||||
REC[Recognition]
|
||||
LMK[Landmarks]
|
||||
ATTR[Attributes]
|
||||
GAZE[Gaze]
|
||||
PARSE[Parsing]
|
||||
SPOOF[Anti-Spoofing]
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
|
||||
IMG --> DET
|
||||
DET --> REC
|
||||
DET --> LMK
|
||||
DET --> ATTR
|
||||
DET --> GAZE
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. ONNX-First
|
||||
|
||||
All models use ONNX Runtime for inference:
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
- **Production-ready**: No Python-only dependencies for inference
|
||||
|
||||
### 2. Minimal Dependencies
|
||||
|
||||
Core dependencies are kept minimal:
|
||||
|
||||
```
|
||||
numpy # Array operations
|
||||
opencv-python # Image processing
|
||||
onnxruntime # Model inference
|
||||
requests # Model download
|
||||
tqdm # Progress bars
|
||||
```
|
||||
|
||||
### 3. Simple API
|
||||
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
|
||||
Full type hints throughout:
|
||||
|
||||
```python
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face)
|
||||
├── recognition/ # Face recognition (ArcFace, MobileFace, SphereFace)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── gaze/ # Gaze estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# 2. Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# 3. Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# 4. Analyze each face
|
||||
for face in faces:
|
||||
# Recognition embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
print(f"Face: {attrs.sex}, {attrs.age} years")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FaceAnalyzer
|
||||
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
|
||||
```python
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Understand data types
|
||||
- [Execution Providers](execution-providers.md) - Hardware acceleration
|
||||
- [Detection Module](../modules/detection.md) - Start with face detection
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
234
docs/concepts/thresholds-calibration.md
Normal file
234
docs/concepts/thresholds-calibration.md
Normal file
@@ -0,0 +1,234 @@
|
||||
# Thresholds & Calibration
|
||||
|
||||
This page explains how to tune detection and recognition thresholds for your use case.
|
||||
|
||||
---
|
||||
|
||||
## Detection Thresholds
|
||||
|
||||
### Confidence Threshold
|
||||
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
|
||||
# High precision (fewer false positives)
|
||||
detector = RetinaFace(confidence_threshold=0.8)
|
||||
|
||||
# High recall (catch more faces)
|
||||
detector = RetinaFace(confidence_threshold=0.3)
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
| Threshold | Use Case |
|
||||
|-----------|----------|
|
||||
| 0.3 - 0.4 | Maximum recall (research, analysis) |
|
||||
| 0.5 - 0.6 | Balanced (default, general use) |
|
||||
| 0.7 - 0.9 | High precision (production, security) |
|
||||
|
||||
---
|
||||
|
||||
### NMS Threshold
|
||||
|
||||
Non-Maximum Suppression removes overlapping detections:
|
||||
|
||||
```python
|
||||
# Default
|
||||
detector = RetinaFace(nms_threshold=0.4)
|
||||
|
||||
# Stricter (fewer overlapping boxes)
|
||||
detector = RetinaFace(nms_threshold=0.3)
|
||||
|
||||
# Looser (for crowded scenes)
|
||||
detector = RetinaFace(nms_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Input Size
|
||||
|
||||
Affects detection accuracy and speed:
|
||||
|
||||
```python
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
|
||||
# Higher accuracy, slower
|
||||
detector = RetinaFace(input_size=(1280, 1280))
|
||||
```
|
||||
|
||||
!!! tip "Dynamic Size"
|
||||
For RetinaFace, enable dynamic input for variable image sizes:
|
||||
```python
|
||||
detector = RetinaFace(dynamic_size=True)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recognition Thresholds
|
||||
|
||||
### Similarity Threshold
|
||||
|
||||
For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
# Threshold interpretation
|
||||
if similarity > 0.6:
|
||||
print("Same person (high confidence)")
|
||||
elif similarity > 0.4:
|
||||
print("Uncertain (manual review)")
|
||||
else:
|
||||
print("Different people")
|
||||
```
|
||||
|
||||
**Recommended thresholds:**
|
||||
|
||||
| Threshold | Decision | False Accept Rate |
|
||||
|-----------|----------|-------------------|
|
||||
| 0.4 | Low security | Higher FAR |
|
||||
| 0.5 | Balanced | Moderate FAR |
|
||||
| 0.6 | High security | Lower FAR |
|
||||
| 0.7 | Very strict | Very low FAR |
|
||||
|
||||
---
|
||||
|
||||
### Calibration for Your Dataset
|
||||
|
||||
Test on your data to find optimal thresholds:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def calibrate_threshold(same_pairs, diff_pairs, recognizer, detector):
|
||||
"""Find optimal threshold for your dataset."""
|
||||
same_scores = []
|
||||
diff_scores = []
|
||||
|
||||
# Compute similarities for same-person pairs
|
||||
for img1_path, img2_path in same_pairs:
|
||||
img1 = cv2.imread(img1_path)
|
||||
img2 = cv2.imread(img2_path)
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if faces1 and faces2:
|
||||
emb1 = recognizer.get_normalized_embedding(img1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(img2, faces2[0].landmarks)
|
||||
same_scores.append(np.dot(emb1, emb2.T)[0][0])
|
||||
|
||||
# Compute similarities for different-person pairs
|
||||
for img1_path, img2_path in diff_pairs:
|
||||
# ... similar process
|
||||
diff_scores.append(similarity)
|
||||
|
||||
# Find optimal threshold
|
||||
thresholds = np.arange(0.3, 0.8, 0.05)
|
||||
best_threshold = 0.5
|
||||
best_accuracy = 0
|
||||
|
||||
for thresh in thresholds:
|
||||
tp = sum(1 for s in same_scores if s >= thresh)
|
||||
tn = sum(1 for s in diff_scores if s < thresh)
|
||||
accuracy = (tp + tn) / (len(same_scores) + len(diff_scores))
|
||||
|
||||
if accuracy > best_accuracy:
|
||||
best_accuracy = accuracy
|
||||
best_threshold = thresh
|
||||
|
||||
return best_threshold, best_accuracy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Thresholds
|
||||
|
||||
The MiniFASNet model returns a confidence score:
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
spoofer = MiniFASNet()
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# Default threshold (0.5)
|
||||
if result.is_real: # confidence > 0.5
|
||||
print("Real face")
|
||||
|
||||
# Custom threshold for high security
|
||||
SPOOF_THRESHOLD = 0.7
|
||||
if result.confidence > SPOOF_THRESHOLD:
|
||||
print("Real face (high confidence)")
|
||||
else:
|
||||
print("Potentially fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attribute Model Confidence
|
||||
|
||||
### Emotion
|
||||
|
||||
```python
|
||||
result = emotion_predictor.predict(image, landmarks)
|
||||
|
||||
# Filter low-confidence predictions
|
||||
if result.confidence > 0.6:
|
||||
print(f"Emotion: {result.emotion}")
|
||||
else:
|
||||
print("Uncertain emotion")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Threshold
|
||||
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
scores = [f.confidence for f in faces if f.confidence > 0.7]
|
||||
landmarks = [f.landmarks for f in faces if f.confidence > 0.7]
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6 # Additional visualization filter
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Parameter | Default | Range | Lower = | Higher = |
|
||||
|-----------|---------|-------|---------|----------|
|
||||
| `confidence_threshold` | 0.5 | 0.1-0.9 | More detections | Fewer false positives |
|
||||
| `nms_threshold` | 0.4 | 0.1-0.7 | Fewer overlaps | More overlapping boxes |
|
||||
| Similarity threshold | 0.6 | 0.3-0.8 | More matches (FAR↑) | Fewer matches (FRR↑) |
|
||||
| Spoof confidence | 0.5 | 0.3-0.9 | More "real" | Stricter liveness |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Detection model options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition model options
|
||||
72
docs/contributing.md
Normal file
72
docs/contributing.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Contributing
|
||||
|
||||
Thank you for contributing to UniFace!
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
We use [Ruff](https://docs.astral.sh/ruff/) for formatting:
|
||||
|
||||
```bash
|
||||
ruff format .
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
- Line length: 120
|
||||
- Python 3.11+ type hints
|
||||
- Google-style docstrings
|
||||
|
||||
---
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Write tests for new features
|
||||
4. Ensure tests pass
|
||||
5. Submit PR with clear description
|
||||
|
||||
---
|
||||
|
||||
## Adding New Models
|
||||
|
||||
1. Create model class in appropriate submodule
|
||||
2. Add weight constants to `uniface/constants.py`
|
||||
3. Export in `__init__.py` files
|
||||
4. Write tests in `tests/`
|
||||
5. Add example in `tools/` or notebooks
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue on [GitHub](https://github.com/yakhyo/uniface/issues).
|
||||
133
docs/index.md
Normal file
133
docs/index.md
Normal file
@@ -0,0 +1,133 @@
|
||||
---
|
||||
hide:
|
||||
- toc
|
||||
- navigation
|
||||
- edit
|
||||
template: home.html
|
||||
---
|
||||
|
||||
<div class="hero" markdown>
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">A lightweight, production-ready face analysis library built on ONNX Runtime</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
|
||||
</div>
|
||||
|
||||
<div class="feature-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized RetinaFace, SCRFD, and YOLOv5-Face models with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-check: Face Recognition
|
||||
ArcFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-map-marker: Landmarks
|
||||
Accurate 106-point facial landmark localization for detailed face analysis.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-details: Attributes
|
||||
Age, gender, race (FairFace), and emotion detection from faces.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-man-shimmer: Face Parsing
|
||||
BiSeNet semantic segmentation with 19 facial component classes.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-eye: Gaze Estimation
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-blur: Privacy
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
=== "Standard"
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
=== "GPU (CUDA)"
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
<div class="next-steps-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-rocket-launch: Quickstart
|
||||
Get up and running in 5 minutes with common use cases.
|
||||
|
||||
[Quickstart Guide →](quickstart.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-school: Tutorials
|
||||
Step-by-step examples for common workflows.
|
||||
|
||||
[View Tutorials →](recipes/image-pipeline.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-api: API Reference
|
||||
Explore individual modules and their APIs.
|
||||
|
||||
[Browse API →](modules/detection.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-book-open-variant: Guides
|
||||
Learn about the architecture and design principles.
|
||||
|
||||
[Read Guides →](concepts/overview.md)
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
174
docs/installation.md
Normal file
174
docs/installation.md
Normal file
@@ -0,0 +1,174 @@
|
||||
# Installation
|
||||
|
||||
This guide covers all installation options for UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: 3.11 or higher
|
||||
- **Operating Systems**: macOS, Linux, Windows
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
The simplest way to install UniFace:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
This installs the CPU version with all core dependencies.
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Installation
|
||||
|
||||
### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes ARM64 optimizations:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
!!! tip "Native Performance"
|
||||
The base `onnxruntime` package has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. No additional configuration needed.
|
||||
|
||||
Verify ARM64 installation:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility matrix.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
# Should include: 'CUDAExecutionProvider'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms with automatic CPU fallback.
|
||||
|
||||
---
|
||||
|
||||
## Install from Source
|
||||
|
||||
For development or the latest features:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
With development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
UniFace has minimal dependencies:
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
|
||||
Test your installation:
|
||||
|
||||
```python
|
||||
import uniface
|
||||
print(f"UniFace version: {uniface.__version__}")
|
||||
|
||||
# Check available ONNX providers
|
||||
import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.11+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.11.x or higher
|
||||
```
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
Models are automatically downloaded on first use. If downloads fail:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Performance Issues on Mac
|
||||
|
||||
Verify you're using the ARM64 build (not x86_64 via Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart Guide](quickstart.md) - Get started in 5 minutes
|
||||
- [Execution Providers](concepts/execution-providers.md) - Hardware acceleration setup
|
||||
22
docs/license-attribution.md
Normal file
22
docs/license-attribution.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# Licenses & Attribution
|
||||
|
||||
## UniFace License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
||||
---
|
||||
|
||||
## Model Credits
|
||||
|
||||
| Model | Source | License |
|
||||
|-------|--------|---------|
|
||||
| RetinaFace | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | MIT |
|
||||
| SCRFD | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| YOLOv5-Face | [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | GPL-3.0 |
|
||||
| ArcFace | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| MobileFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| BiSeNet | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | MIT |
|
||||
| MobileGaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | MIT |
|
||||
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
|
||||
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
|
||||
317
docs/models.md
Normal file
317
docs/models.md
Normal file
@@ -0,0 +1,317 @@
|
||||
# Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ---- | ------ | ------ | ------ | ------------------------------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face).
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! tip "Use Case"
|
||||
These models are lightweight alternatives to ArcFace for resource-constrained environments.
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! note "Architecture"
|
||||
SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Group | Points | Count |
|
||||
|-------|--------|-------|
|
||||
| Face contour | 0-32 | 33 points |
|
||||
| Eyebrows | 33-50 | 18 points |
|
||||
| Nose | 51-62 | 12 points |
|
||||
| Eyes | 63-86 | 24 points |
|
||||
| Mouth | 87-105 | 19 points |
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebA
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | --------------------- | ------ | ----- | --------------------------- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB | Balanced demographic prediction |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on AffectNet
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation Models
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Real-time gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* | Use Case |
|
||||
| -------------- | ------ | ------- | ----- | ----------------------------- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 | Balanced accuracy/speed |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 | **Default** |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 | High accuracy |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 | Mobile/Edge devices |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 | Lightweight/Real-time |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes | Use Case |
|
||||
| -------------- | ------ | ------- | ------- | ----------------------------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 | **Default** |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 | Higher accuracy |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Lightweight face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale | Use Case |
|
||||
| ---------- | ------ | ----- | ----------------------------- |
|
||||
| `V1SE` | 1.2 MB | 4.0 | Squeeze-and-excitation variant |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 | **Default** |
|
||||
|
||||
!!! info "Output Format"
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face bounding box from a detector. Use with RetinaFace, SCRFD, or YOLOv5Face.
|
||||
|
||||
---
|
||||
|
||||
## Model Management
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
279
docs/modules/attributes.md
Normal file
279
docs/modules/attributes.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# Attributes
|
||||
|
||||
Facial attribute analysis for age, gender, race, and emotion detection.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Attributes | Size | Notes |
|
||||
|-------|------------|------|-------|
|
||||
| **AgeGender** | Age, Gender | 8 MB | Exact age prediction |
|
||||
| **FairFace** | Gender, Age Group, Race | 44 MB | Balanced demographics |
|
||||
| **Emotion** | 7-8 emotions | 2 MB | Requires PyTorch |
|
||||
|
||||
---
|
||||
|
||||
## AgeGender
|
||||
|
||||
Predicts exact age and binary gender.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}") # "Female" or "Male"
|
||||
print(f"Age: {result.age} years")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male" (property)
|
||||
result.age # int, age in years
|
||||
result.age_group # None (not provided by this model)
|
||||
result.race # None (not provided by this model)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace
|
||||
|
||||
Predicts gender, age group, and race with balanced demographics.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}")
|
||||
print(f"Age Group: {result.age_group}")
|
||||
print(f"Race: {result.race}")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male"
|
||||
result.age # None (not provided by this model)
|
||||
result.age_group # "20-29", "30-39", etc.
|
||||
result.race # Race/ethnicity label
|
||||
```
|
||||
|
||||
### Race Categories
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| White |
|
||||
| Black |
|
||||
| Latino Hispanic |
|
||||
| East Asian |
|
||||
| Southeast Asian |
|
||||
| Indian |
|
||||
| Middle Eastern |
|
||||
|
||||
### Age Groups
|
||||
|
||||
| Group |
|
||||
|-------|
|
||||
| 0-2 |
|
||||
| 3-9 |
|
||||
| 10-19 |
|
||||
| 20-29 |
|
||||
| 30-39 |
|
||||
| 40-49 |
|
||||
| 50-59 |
|
||||
| 60-69 |
|
||||
| 70+ |
|
||||
|
||||
---
|
||||
|
||||
## Emotion
|
||||
|
||||
Predicts facial emotions. Requires PyTorch.
|
||||
|
||||
!!! warning "Optional Dependency"
|
||||
Emotion detection requires PyTorch. Install with:
|
||||
```bash
|
||||
pip install torch
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
detector = RetinaFace()
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = emotion.predict(image, face.landmarks)
|
||||
print(f"Emotion: {result.emotion}")
|
||||
print(f"Confidence: {result.confidence:.2%}")
|
||||
```
|
||||
|
||||
### Emotion Classes
|
||||
|
||||
=== "7-Class (AFFECNET7)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
# 7-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
# 8-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Combining Models
|
||||
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Get exact age from AgeGender
|
||||
ag_result = age_gender.predict(image, face.bbox)
|
||||
|
||||
# Get race from FairFace
|
||||
ff_result = fairface.predict(image, face.bbox)
|
||||
|
||||
print(f"Gender: {ag_result.sex}")
|
||||
print(f"Exact Age: {ag_result.age}")
|
||||
print(f"Age Group: {ff_result.age_group}")
|
||||
print(f"Race: {ff_result.race}")
|
||||
```
|
||||
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=False,
|
||||
attributes=True # Uses AgeGender
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_attributes(image, face, result):
|
||||
"""Draw attributes on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Build label
|
||||
label = f"{result.sex}"
|
||||
if result.age:
|
||||
label += f", {result.age}y"
|
||||
if result.age_group:
|
||||
label += f", {result.age_group}"
|
||||
if result.race:
|
||||
label += f", {result.race}"
|
||||
|
||||
# Draw label
|
||||
cv2.putText(
|
||||
image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
|
||||
)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
image = draw_attributes(image, face, result)
|
||||
|
||||
cv2.imwrite("attributes.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Accuracy Notes
|
||||
|
||||
!!! note "Model Limitations"
|
||||
- **AgeGender**: Trained on CelebA; accuracy varies by demographic
|
||||
- **FairFace**: Trained for balanced demographics; better cross-racial accuracy
|
||||
- **Emotion**: Accuracy depends on facial expression clarity
|
||||
|
||||
Always test on your specific use case and consider cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Parsing](parsing.md) - Face semantic segmentation
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
252
docs/modules/detection.md
Normal file
252
docs/modules/detection.md
Normal file
@@ -0,0 +1,252 @@
|
||||
# Detection
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides three detection models.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | WIDER FACE (Easy/Medium/Hard) | Best For |
|
||||
|-------|----------|------|-------------------------------|----------|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% / 91.0% / 86.6% | Balanced (recommended) |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% / 93.9% / 83.1% | High accuracy |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% / 92.6% / 83.2% | Real-time |
|
||||
|
||||
---
|
||||
|
||||
## RetinaFace
|
||||
|
||||
The recommended detector for most use cases.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}") # (5, 2)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# High accuracy
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| MNET_025 | 0.4M | 1.7 MB | 88.5% | 87.0% | 80.6% |
|
||||
| MNET_050 | 1.0M | 2.6 MB | 89.4% | 88.0% | 82.4% |
|
||||
| MNET_V1 | 3.5M | 3.8 MB | 90.6% | 89.1% | 84.1% |
|
||||
| **MNET_V2** :material-check-circle: | 3.2M | 3.5 MB | 91.7% | 91.0% | 86.6% |
|
||||
| RESNET18 | 11.7M | 27 MB | 92.5% | 91.0% | 86.6% |
|
||||
| RESNET34 | 24.8M | 56 MB | 94.2% | 93.1% | 88.9% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
confidence_threshold=0.5, # Min confidence
|
||||
nms_threshold=0.4, # NMS IoU threshold
|
||||
input_size=(640, 640), # Input resolution
|
||||
dynamic_size=False # Enable dynamic input size
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SCRFD
|
||||
|
||||
State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
|
||||
# High accuracy (default)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| SCRFD_500M_KPS | 0.6M | 2.5 MB | 90.6% | 88.1% | 68.5% |
|
||||
| **SCRFD_10G_KPS** :material-check-circle: | 4.2M | 17 MB | 95.2% | 93.9% | 83.1% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv5-Face
|
||||
|
||||
YOLO-based detection optimized for faces.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5N)
|
||||
|
||||
# Balanced (default)
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5S)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV5N | 11 MB | 93.6% | 91.5% | 80.5% |
|
||||
| **YOLOV5S** :material-check-circle: | 28 MB | 94.3% | 92.6% | 83.2% |
|
||||
| YOLOV5M | 82 MB | 95.3% | 93.8% | 85.3% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv5-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
detector = create_detector('scrfd')
|
||||
# or
|
||||
detector = create_detector('yolov5face')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
faces = detect_faces(
|
||||
image,
|
||||
method='retinaface',
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
|
||||
```python
|
||||
for face in faces:
|
||||
# Bounding box [x1, y1, x2, y2]
|
||||
bbox = face.bbox
|
||||
|
||||
# Detection confidence (0-1)
|
||||
confidence = face.confidence
|
||||
|
||||
# 5-point landmarks (5, 2)
|
||||
landmarks = face.landmarks
|
||||
# [left_eye, right_eye, nose, left_mouth, right_mouth]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces],
|
||||
vis_threshold=0.6
|
||||
)
|
||||
|
||||
cv2.imwrite("result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg --iterations 100
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
|
||||
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters
|
||||
270
docs/modules/gaze.md
Normal file
270
docs/modules/gaze.md
Normal file
@@ -0,0 +1,270 @@
|
||||
# Gaze Estimation
|
||||
|
||||
Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* | Best For |
|
||||
|-------|----------|------|------|----------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° | Balanced |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° | Recommended |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° | High accuracy |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° | Mobile |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° | Lightweight |
|
||||
|
||||
*MAE = Mean Absolute Error on Gaze360 test set (lower is better)
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
# Estimate gaze
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Convert to degrees
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
print(f"Pitch: {pitch_deg:.1f}°, Yaw: {yaw_deg:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
gaze = MobileGaze()
|
||||
|
||||
# Lightweight for mobile/edge
|
||||
gaze = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Higher accuracy
|
||||
gaze = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# GazeResult dataclass
|
||||
result.pitch # Vertical angle in radians
|
||||
result.yaw # Horizontal angle in radians
|
||||
```
|
||||
|
||||
### Angle Convention
|
||||
|
||||
```
|
||||
pitch = +90° (looking up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(looking left) │ (looking right)
|
||||
│
|
||||
pitch = -90° (looking down)
|
||||
```
|
||||
|
||||
- **Pitch**: Vertical gaze angle
|
||||
- Positive = looking up
|
||||
- Negative = looking down
|
||||
|
||||
- **Yaw**: Horizontal gaze angle
|
||||
- Positive = looking right
|
||||
- Negative = looking left
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw gaze arrow on image
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
### Custom Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
"""Draw custom gaze arrow."""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Face center
|
||||
cx = (x1 + x2) // 2
|
||||
cy = (y1 + y2) // 2
|
||||
|
||||
# Calculate endpoint
|
||||
dx = -length * np.sin(yaw) * np.cos(pitch)
|
||||
dy = -length * np.sin(pitch)
|
||||
|
||||
# Draw arrow
|
||||
end_x = int(cx + dx)
|
||||
end_y = int(cy + dy)
|
||||
|
||||
cv2.arrowedLine(image, (cx, cy), (end_x, end_y), color, 2, tipLength=0.3)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Gaze Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Draw gaze
|
||||
draw_gaze(frame, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
# Display angles
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
label = f"P:{pitch_deg:.0f} Y:{yaw_deg:.0f}"
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
cv2.imshow("Gaze Estimation", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Attention Detection
|
||||
|
||||
```python
|
||||
def is_looking_at_camera(result, threshold=15):
|
||||
"""Check if person is looking at camera."""
|
||||
pitch_deg = abs(np.degrees(result.pitch))
|
||||
yaw_deg = abs(np.degrees(result.yaw))
|
||||
|
||||
return pitch_deg < threshold and yaw_deg < threshold
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
if is_looking_at_camera(result):
|
||||
print("Looking at camera")
|
||||
else:
|
||||
print("Looking away")
|
||||
```
|
||||
|
||||
### Gaze Direction Classification
|
||||
|
||||
```python
|
||||
def classify_gaze_direction(result, threshold=20):
|
||||
"""Classify gaze into directions."""
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
directions = []
|
||||
|
||||
if pitch_deg > threshold:
|
||||
directions.append("up")
|
||||
elif pitch_deg < -threshold:
|
||||
directions.append("down")
|
||||
|
||||
if yaw_deg > threshold:
|
||||
directions.append("right")
|
||||
elif yaw_deg < -threshold:
|
||||
directions.append("left")
|
||||
|
||||
if not directions:
|
||||
return "center"
|
||||
|
||||
return " ".join(directions)
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
direction = classify_gaze_direction(result)
|
||||
print(f"Looking: {direction}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
251
docs/modules/landmarks.md
Normal file
251
docs/modules/landmarks.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# Landmarks
|
||||
|
||||
Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Points | Size | Use Case |
|
||||
|-------|--------|------|----------|
|
||||
| **Landmark106** | 106 | 14 MB | Detailed face analysis |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face).
|
||||
|
||||
---
|
||||
|
||||
## 106-Point Landmarks
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Get detailed landmarks
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Landmarks shape: {landmarks.shape}") # (106, 2)
|
||||
```
|
||||
|
||||
### Landmark Groups
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
### Extract Specific Features
|
||||
|
||||
```python
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
|
||||
# Face contour
|
||||
contour = landmarks[0:33]
|
||||
|
||||
# Left eyebrow
|
||||
left_eyebrow = landmarks[33:42]
|
||||
|
||||
# Right eyebrow
|
||||
right_eyebrow = landmarks[42:51]
|
||||
|
||||
# Nose
|
||||
nose = landmarks[51:63]
|
||||
|
||||
# Left eye
|
||||
left_eye = landmarks[63:72]
|
||||
|
||||
# Right eye
|
||||
right_eye = landmarks[76:84]
|
||||
|
||||
# Mouth
|
||||
mouth = landmarks[87:106]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5-Point Landmarks (Detection)
|
||||
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks_5 = faces[0].landmarks
|
||||
print(f"Shape: {landmarks_5.shape}") # (5, 2)
|
||||
|
||||
left_eye = landmarks_5[0]
|
||||
right_eye = landmarks_5[1]
|
||||
nose = landmarks_5[2]
|
||||
left_mouth = landmarks_5[3]
|
||||
right_mouth = landmarks_5[4]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
### Draw 106 Landmarks
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_landmarks(image, landmarks, color=(0, 255, 0), radius=2):
|
||||
"""Draw landmarks on image."""
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), radius, color, -1)
|
||||
return image
|
||||
|
||||
# Usage
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
image_with_landmarks = draw_landmarks(image.copy(), landmarks)
|
||||
cv2.imwrite("landmarks.jpg", image_with_landmarks)
|
||||
```
|
||||
|
||||
### Draw with Connections
|
||||
|
||||
```python
|
||||
def draw_landmarks_with_connections(image, landmarks):
|
||||
"""Draw landmarks with facial feature connections."""
|
||||
landmarks = landmarks.astype(int)
|
||||
|
||||
# Face contour (0-32)
|
||||
for i in range(32):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (255, 255, 0), 1)
|
||||
|
||||
# Left eyebrow (33-41)
|
||||
for i in range(33, 41):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Right eyebrow (42-50)
|
||||
for i in range(42, 50):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Nose (51-62)
|
||||
for i in range(51, 62):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 0, 255), 1)
|
||||
|
||||
# Draw points
|
||||
for x, y in landmarks:
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 255), -1)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
# Returns: 112x112 aligned face
|
||||
```
|
||||
|
||||
### Eye Aspect Ratio (Blink Detection)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def eye_aspect_ratio(eye_landmarks):
|
||||
"""Calculate eye aspect ratio for blink detection."""
|
||||
# Vertical distances
|
||||
v1 = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
|
||||
v2 = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
|
||||
|
||||
# Horizontal distance
|
||||
h = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
|
||||
|
||||
ear = (v1 + v2) / (2.0 * h)
|
||||
return ear
|
||||
|
||||
# Usage with 106-point landmarks
|
||||
left_eye = landmarks[63:72] # Approximate eye points
|
||||
ear = eye_aspect_ratio(left_eye)
|
||||
|
||||
if ear < 0.2:
|
||||
print("Eye closed (blink detected)")
|
||||
```
|
||||
|
||||
### Head Pose Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def estimate_head_pose(landmarks, image_shape):
|
||||
"""Estimate head pose from facial landmarks."""
|
||||
# 3D model points (generic face model)
|
||||
model_points = np.array([
|
||||
(0.0, 0.0, 0.0), # Nose tip
|
||||
(0.0, -330.0, -65.0), # Chin
|
||||
(-225.0, 170.0, -135.0), # Left eye corner
|
||||
(225.0, 170.0, -135.0), # Right eye corner
|
||||
(-150.0, -150.0, -125.0), # Left mouth corner
|
||||
(150.0, -150.0, -125.0) # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# 2D image points (from 106 landmarks)
|
||||
image_points = np.array([
|
||||
landmarks[51], # Nose tip
|
||||
landmarks[16], # Chin
|
||||
landmarks[63], # Left eye corner
|
||||
landmarks[76], # Right eye corner
|
||||
landmarks[87], # Left mouth corner
|
||||
landmarks[93] # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# Camera matrix
|
||||
h, w = image_shape[:2]
|
||||
focal_length = w
|
||||
center = (w / 2, h / 2)
|
||||
camera_matrix = np.array([
|
||||
[focal_length, 0, center[0]],
|
||||
[0, focal_length, center[1]],
|
||||
[0, 0, 1]
|
||||
], dtype=np.float64)
|
||||
|
||||
# Solve PnP
|
||||
dist_coeffs = np.zeros((4, 1))
|
||||
success, rotation_vector, translation_vector = cv2.solvePnP(
|
||||
model_points, image_points, camera_matrix, dist_coeffs
|
||||
)
|
||||
|
||||
return rotation_vector, translation_vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Face detection with 5-point landmarks
|
||||
- [Attributes Module](attributes.md) - Age, gender, emotion
|
||||
- [Gaze Module](gaze.md) - Gaze estimation
|
||||
- [Concepts: Coordinate Systems](../concepts/coordinate-systems.md) - Landmark formats
|
||||
265
docs/modules/parsing.md
Normal file
265
docs/modules/parsing.md
Normal file
@@ -0,0 +1,265 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components (skin, eyes, nose, mouth, hair, etc.).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Classes | Best For |
|
||||
|-------|----------|------|---------|----------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 | Balanced (recommended) |
|
||||
| **BiSeNet ResNet34** | ResNet34 | 89 MB | 19 | Higher accuracy |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save result
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 19 Facial Component Classes
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (ResNet18)
|
||||
parser = BiSeNet()
|
||||
|
||||
# Higher accuracy (ResNet34)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Notes |
|
||||
|---------|--------|------|-------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB | Recommended |
|
||||
| RESNET34 | 24.1M | 89 MB | Higher accuracy |
|
||||
|
||||
---
|
||||
|
||||
## Full Pipeline
|
||||
|
||||
### With Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(f"face_{i}_parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Specific Components
|
||||
|
||||
### Get Single Component Mask
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 18
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
# Binary mask for skin
|
||||
skin_mask = (mask == SKIN).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for hair
|
||||
hair_mask = (mask == HAIR).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for eyes
|
||||
eyes_mask = ((mask == LEFT_EYE) | (mask == RIGHT_EYE)).astype(np.uint8) * 255
|
||||
```
|
||||
|
||||
### Count Pixels per Component
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Glasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Hat', 10: 'Earring', 11: 'Nose', 12: 'Mouth',
|
||||
13: 'U-Lip', 14: 'L-Lip', 15: 'Neck', 16: 'Necklace',
|
||||
17: 'Cloth', 18: 'Hair'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
pixel_count = np.sum(mask == class_id)
|
||||
name = component_names.get(class_id, f'Class {class_id}')
|
||||
print(f"{name}: {pixel_count} pixels")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Applications
|
||||
|
||||
### Face Makeup
|
||||
|
||||
Apply virtual makeup using component masks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
|
||||
```python
|
||||
def replace_background(image, mask, background):
|
||||
"""Replace background using parsing mask."""
|
||||
# Create foreground mask (everything except background)
|
||||
foreground_mask = (mask != 0).astype(np.uint8)
|
||||
|
||||
# Resize background to match image
|
||||
background = cv2.resize(background, (image.shape[1], image.shape[0]))
|
||||
|
||||
# Combine
|
||||
result = image.copy()
|
||||
result[foreground_mask == 0] = background[foreground_mask == 0]
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Hair Segmentation
|
||||
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 18).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_CLOSE, kernel)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
return hair_mask
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
|
||||
# With different parameters
|
||||
vis_result = vis_parsing_maps(
|
||||
face_rgb,
|
||||
mask,
|
||||
save_image=False, # Don't save to file
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
|
||||
parser = create_face_parser() # Returns BiSeNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
277
docs/modules/privacy.md
Normal file
277
docs/modules/privacy.md
Normal file
@@ -0,0 +1,277 @@
|
||||
# Privacy
|
||||
|
||||
Face anonymization protects privacy by blurring or obscuring faces in images and videos.
|
||||
|
||||
---
|
||||
|
||||
## Available Methods
|
||||
|
||||
| Method | Description | Use Case |
|
||||
|--------|-------------|----------|
|
||||
| **pixelate** | Blocky pixelation | News media standard |
|
||||
| **gaussian** | Smooth blur | Natural appearance |
|
||||
| **blackout** | Solid color fill | Maximum privacy |
|
||||
| **elliptical** | Oval-shaped blur | Natural face shape |
|
||||
| **median** | Edge-preserving blur | Artistic effect |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Blur Methods
|
||||
|
||||
### Pixelate
|
||||
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 10 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
Smooth, natural-looking blur:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity (higher = more blur) |
|
||||
|
||||
### Blackout
|
||||
|
||||
Solid color fill for maximum privacy:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `color` | (0, 0, 0) | Fill color (BGR format) |
|
||||
|
||||
### Elliptical
|
||||
|
||||
Oval-shaped blur matching natural face shape:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=20)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
| `margin` | 20 | Margin around face |
|
||||
|
||||
### Median
|
||||
|
||||
Edge-preserving blur with artistic effect:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
|
||||
---
|
||||
|
||||
## In-Place Processing
|
||||
|
||||
Modify image directly (faster, saves memory):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
# In-place modification
|
||||
result = blurrer.anonymize(image, faces, inplace=True)
|
||||
# 'image' and 'result' point to the same array
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Anonymization
|
||||
|
||||
### Webcam
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Video File
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input_video.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (width, height))
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Selective Anonymization
|
||||
|
||||
### Exclude Specific Faces
|
||||
|
||||
```python
|
||||
def anonymize_except(image, all_faces, exclude_embeddings, recognizer, threshold=0.6):
|
||||
"""Anonymize all faces except those matching exclude_embeddings."""
|
||||
faces_to_blur = []
|
||||
|
||||
for face in all_faces:
|
||||
# Get embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Check if should be excluded
|
||||
should_exclude = False
|
||||
for ref_emb in exclude_embeddings:
|
||||
similarity = np.dot(embedding, ref_emb.T)[0][0]
|
||||
if similarity > threshold:
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if not should_exclude:
|
||||
faces_to_blur.append(face)
|
||||
|
||||
# Blur remaining faces
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
### Confidence-Based
|
||||
|
||||
```python
|
||||
def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
"""Anonymize faces below confidence threshold."""
|
||||
faces_to_blur = [f for f in faces if f.confidence < confidence_threshold]
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparison
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
methods = ['pixelate', 'gaussian', 'blackout', 'elliptical', 'median']
|
||||
|
||||
for method in methods:
|
||||
blurrer = BlurFace(method=method)
|
||||
result = blurrer.anonymize(image.copy(), faces)
|
||||
cv2.imwrite(f"anonymized_{method}.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anonymize Stream Recipe](../recipes/anonymize-stream.md) - Video pipeline
|
||||
- [Detection](detection.md) - Face detection options
|
||||
- [Batch Processing Recipe](../recipes/batch-processing.md) - Process multiple files
|
||||
240
docs/modules/recognition.md
Normal file
240
docs/modules/recognition.md
Normal file
@@ -0,0 +1,240 @@
|
||||
# Recognition
|
||||
|
||||
Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Embedding Dim | Best For |
|
||||
|-------|----------|------|---------------|----------|
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 | General use (recommended) |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 | Mobile/Edge |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 | Research |
|
||||
|
||||
---
|
||||
|
||||
## ArcFace
|
||||
|
||||
State-of-the-art recognition using additive angular margin loss.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.MNET)
|
||||
|
||||
# High accuracy
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
```
|
||||
|
||||
| Variant | Backbone | Size | Use Case |
|
||||
|---------|----------|------|----------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | Balanced (recommended) |
|
||||
| RESNET | ResNet50 | 166 MB | Maximum accuracy |
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight recognition for resource-constrained environments.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
# Higher accuracy
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V3_LARGE)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | Use Case |
|
||||
|---------|--------|------|-----|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.8% | Ultra-lightweight |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.6% | Mobile/Edge |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.3% | Mobile optimized |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.5% | Balanced mobile |
|
||||
|
||||
---
|
||||
|
||||
## SphereFace
|
||||
|
||||
Recognition using angular softmax loss (A-Softmax).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | Use Case |
|
||||
|---------|--------|------|-----|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.7% | Research |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.7% | Research |
|
||||
|
||||
---
|
||||
|
||||
## Face Comparison
|
||||
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, landmarks1)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, landmarks2)
|
||||
|
||||
# Method 1: Using utility function
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Method 2: Direct computation
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Threshold Guidelines
|
||||
|
||||
| Threshold | Decision | Use Case |
|
||||
|-----------|----------|----------|
|
||||
| > 0.7 | Very high confidence | Security-critical |
|
||||
| > 0.6 | Same person | General verification |
|
||||
| 0.4 - 0.6 | Uncertain | Manual review needed |
|
||||
| < 0.4 | Different people | Rejection |
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Recognition models require aligned faces. UniFace handles this internally:
|
||||
|
||||
```python
|
||||
# Alignment is done automatically
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Building a Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Save for later use
|
||||
np.savez('face_database.npz', **database)
|
||||
|
||||
# Load database
|
||||
data = np.load('face_database.npz')
|
||||
database = {key: data[key] for key in data.files}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Search
|
||||
|
||||
Find a person in a database:
|
||||
|
||||
```python
|
||||
def search_face(query_embedding, database, threshold=0.6):
|
||||
"""Find best match in database."""
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
|
||||
if similarity > best_similarity and similarity > threshold:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
return best_match, best_similarity
|
||||
|
||||
# Usage
|
||||
query_embedding = recognizer.get_normalized_embedding(query_image, landmarks)
|
||||
match, similarity = search_face(query_embedding, database)
|
||||
|
||||
if match:
|
||||
print(f"Found: {match} (similarity: {similarity:.4f})")
|
||||
else:
|
||||
print("No match found")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
|
||||
recognizer = create_recognizer('arcface')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Detect faces first
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Complete search system
|
||||
- [Thresholds](../concepts/thresholds-calibration.md) - Calibration guide
|
||||
266
docs/modules/spoofing.md
Normal file
266
docs/modules/spoofing.md
Normal file
@@ -0,0 +1,266 @@
|
||||
# Anti-Spoofing
|
||||
|
||||
Face anti-spoofing detects whether a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Size | Notes |
|
||||
|-------|------|-------|
|
||||
| MiniFASNet V1SE | 1.2 MB | Squeeze-and-Excitation variant |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB | Improved version (recommended) |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# SpoofingResult dataclass
|
||||
result.is_real # True = real, False = fake
|
||||
result.confidence # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
```
|
||||
|
||||
| Variant | Size | Scale Factor |
|
||||
|---------|------|--------------|
|
||||
| V1SE | 1.2 MB | 4.0 |
|
||||
| **V2** :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
---
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
The default threshold is 0.5. Adjust for your use case:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced
|
||||
if result.is_real: # Uses default 0.5 threshold
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_spoofing_result(image, face, result):
|
||||
"""Draw spoofing result on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Color based on result
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
# Draw label
|
||||
text = f"{label}: {result.confidence:.1%}"
|
||||
cv2.putText(image, text, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
image = draw_spoofing_result(image, face, result)
|
||||
|
||||
cv2.imwrite("spoofing_result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Liveness Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
|
||||
# Draw result
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = f"{'Real' if result.is_real else 'Fake'}: {result.confidence:.0%}"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Liveness Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Access Control
|
||||
|
||||
```python
|
||||
def verify_liveness(image, face, spoofer, threshold=0.6):
|
||||
"""Verify face is real for access control."""
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
if result.is_real and result.confidence > threshold:
|
||||
return True, result.confidence
|
||||
return False, result.confidence
|
||||
|
||||
# Usage
|
||||
is_live, confidence = verify_liveness(image, face, spoofer)
|
||||
if is_live:
|
||||
print(f"Access granted (confidence: {confidence:.1%})")
|
||||
else:
|
||||
print(f"Access denied - possible spoof attempt")
|
||||
```
|
||||
|
||||
### Multi-Frame Verification
|
||||
|
||||
For higher security, verify across multiple frames:
|
||||
|
||||
```python
|
||||
def verify_liveness_multiframe(frames, detector, spoofer, min_real=3):
|
||||
"""Verify liveness across multiple frames."""
|
||||
real_count = 0
|
||||
|
||||
for frame in frames:
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
continue
|
||||
|
||||
result = spoofer.predict(frame, faces[0].bbox)
|
||||
if result.is_real:
|
||||
real_count += 1
|
||||
|
||||
return real_count >= min_real
|
||||
|
||||
# Collect frames and verify
|
||||
frames = []
|
||||
for _ in range(5):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frames.append(frame)
|
||||
|
||||
is_verified = verify_liveness_multiframe(frames, detector, spoofer)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attack Types Detected
|
||||
|
||||
MiniFASNet can detect various spoof attacks:
|
||||
|
||||
| Attack Type | Detection |
|
||||
|-------------|-----------|
|
||||
| Printed photos | ✅ |
|
||||
| Screen replay | ✅ |
|
||||
| Video replay | ✅ |
|
||||
| Paper masks | ✅ |
|
||||
| 3D masks | Limited |
|
||||
|
||||
!!! warning "Limitations"
|
||||
- High-quality 3D masks may not be detected
|
||||
- Performance varies with lighting and image quality
|
||||
- Always combine with other verification methods for high-security applications
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
- [Recognition](recognition.md) - Face recognition
|
||||
57
docs/notebooks.md
Normal file
57
docs/notebooks.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Interactive Notebooks
|
||||
|
||||
Run UniFace examples directly in your browser with Google Colab, or download and run locally with Jupyter.
|
||||
|
||||
---
|
||||
|
||||
## Available Notebooks
|
||||
|
||||
| Notebook | Colab | Description |
|
||||
|----------|:-----:|-------------|
|
||||
| [Face Detection](https://github.com/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Detect faces and 5-point landmarks |
|
||||
| [Face Alignment](https://github.com/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Align faces for recognition |
|
||||
| [Face Verification](https://github.com/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [Face Search](https://github.com/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one face analysis |
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
Download and run notebooks on your machine:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dependencies
|
||||
pip install uniface jupyter
|
||||
|
||||
# Launch Jupyter
|
||||
jupyter notebook examples/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running on Google Colab
|
||||
|
||||
Click any **"Open in Colab"** badge above. The notebooks automatically:
|
||||
|
||||
1. Install UniFace via pip
|
||||
2. Clone the repository to access test images
|
||||
3. Set up the correct working directory
|
||||
|
||||
!!! tip "GPU Acceleration"
|
||||
In Colab, go to **Runtime → Change runtime type → GPU** for faster inference.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart](quickstart.md) - Code snippets for common use cases
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step workflow guides
|
||||
- [API Reference](modules/detection.md) - Detailed module documentation
|
||||
5
docs/overrides/home.html
Normal file
5
docs/overrides/home.html
Normal file
@@ -0,0 +1,5 @@
|
||||
{% extends "main.html" %}
|
||||
|
||||
{% block source %}
|
||||
<!-- Hide edit/view source on home page -->
|
||||
{% endblock %}
|
||||
426
docs/quickstart.md
Normal file
426
docs/quickstart.md
Normal file
@@ -0,0 +1,426 @@
|
||||
# Quickstart
|
||||
|
||||
Get up and running with UniFace in 5 minutes. This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualize Detections
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
```
|
||||
|
||||
!!! tip "Similarity Thresholds"
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## Age & Gender Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace Attributes
|
||||
|
||||
Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmarks (106 Points)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing
|
||||
|
||||
Segment face into semantic components:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Anonymization
|
||||
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `pixelate` | Blocky effect (news media standard) |
|
||||
| `gaussian` | Smooth, natural blur |
|
||||
| `blackout` | Solid color boxes (maximum privacy) |
|
||||
| `elliptical` | Soft oval blur (natural face shape) |
|
||||
| `median` | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Face Anti-Spoofing
|
||||
|
||||
Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Webcam Demo
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks)
|
||||
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons, benchmarks, and selection guidance, see the [Model Zoo](models.md).
|
||||
|
||||
**Quick recommendations:**
|
||||
|
||||
| Task | Recommended Model | Alternative |
|
||||
|------|-------------------|-------------|
|
||||
| Detection (balanced) | `RetinaFace` (MNET_V2) | `YOLOv5Face` (YOLOV5S) |
|
||||
| Detection (speed) | `RetinaFace` (MNET_025) | `SCRFD` (SCRFD_500M) |
|
||||
| Detection (accuracy) | `SCRFD` (SCRFD_10G) | `RetinaFace` (RESNET34) |
|
||||
| Recognition | `ArcFace` (MNET) | `MobileFace` (MNET_V2) |
|
||||
| Gaze | `MobileGaze` (RESNET34) | `MobileGaze` (MOBILEONE_S0) |
|
||||
| Parsing | `BiSeNet` (RESNET18) | `BiSeNet` (RESNET34) |
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Models Not Downloading
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Zoo](models.md) - All models, benchmarks, and selection guide
|
||||
- [API Reference](modules/detection.md) - Explore individual modules and their APIs
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step examples for common workflows
|
||||
- [Guides](concepts/overview.md) - Learn about the architecture and design principles
|
||||
99
docs/recipes/anonymize-stream.md
Normal file
99
docs/recipes/anonymize-stream.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Anonymize Stream
|
||||
|
||||
Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
w, h = int(cap.get(3)), int(cap.get(4))
|
||||
|
||||
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Blur Methods
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
| Median | `BlurFace(method='median', blur_strength=3.0)` |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Privacy Module](../modules/privacy.md) - Privacy protection details
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Detection Module](../modules/detection.md) - Face detection
|
||||
83
docs/recipes/batch-processing.md
Normal file
83
docs/recipes/batch-processing.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Batch Processing
|
||||
|
||||
Process multiple images efficiently.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Batch Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
def process_directory(input_dir, output_dir):
|
||||
"""Process all images in a directory."""
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for image_path in input_path.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Process and save results
|
||||
# ... your code here ...
|
||||
|
||||
# Usage
|
||||
process_directory("input_images/", "output_images/")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## With Progress Bar
|
||||
|
||||
```python
|
||||
from tqdm import tqdm
|
||||
|
||||
for image_path in tqdm(image_files, desc="Processing"):
|
||||
# ... process image ...
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
embeddings = {}
|
||||
for image_path in Path("faces/").glob("*.jpg"):
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
embeddings[image_path.stem] = embedding
|
||||
|
||||
# Save embeddings
|
||||
np.savez("embeddings.npz", **embeddings)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Search through embeddings
|
||||
- [Image Pipeline](image-pipeline.md) - Full analysis pipeline
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
114
docs/recipes/custom-models.md
Normal file
114
docs/recipes/custom-models.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Custom Models
|
||||
|
||||
Add your own ONNX models to UniFace.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns for advanced users. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
UniFace is designed to be extensible. You can add custom ONNX models by:
|
||||
|
||||
1. Creating a class that inherits from the appropriate base class
|
||||
2. Implementing required methods
|
||||
3. Using the ONNX Runtime utilities provided by UniFace
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Detection Model
|
||||
|
||||
```python
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self._preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
faces = self._postprocess(outputs, image.shape)
|
||||
return faces
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
pass
|
||||
|
||||
def _postprocess(self, outputs, shape):
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface import face_alignment
|
||||
import numpy as np
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str):
|
||||
self.session = create_onnx_session(model_path)
|
||||
|
||||
def get_normalized_embedding(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
landmarks: np.ndarray
|
||||
) -> np.ndarray:
|
||||
# 1. Align face
|
||||
aligned = face_alignment(image, landmarks)
|
||||
|
||||
# 2. Preprocess
|
||||
input_tensor = self._preprocess(aligned)
|
||||
|
||||
# 3. Run inference
|
||||
embedding = self.session.run(None, {'input': input_tensor})[0]
|
||||
|
||||
# 4. Normalize
|
||||
embedding = embedding / np.linalg.norm(embedding)
|
||||
return embedding
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from my_module import MyDetector, MyRecognizer
|
||||
|
||||
# Use custom models
|
||||
detector = MyDetector("path/to/detection_model.onnx")
|
||||
recognizer = MyRecognizer("path/to/recognition_model.onnx")
|
||||
|
||||
# Use like built-in models
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Built-in detection models
|
||||
- [Recognition Module](../modules/recognition.md) - Built-in recognition models
|
||||
- [Concepts: Overview](../concepts/overview.md) - Architecture overview
|
||||
178
docs/recipes/face-search.md
Normal file
178
docs/recipes/face-search.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Face Search
|
||||
|
||||
Build a face search system for finding people in images.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.embeddings = {}
|
||||
|
||||
def add_face(self, person_id, image):
|
||||
"""Add a face to the database."""
|
||||
faces = self.detector.detect(image)
|
||||
if not faces:
|
||||
raise ValueError(f"No face found for {person_id}")
|
||||
|
||||
face = max(faces, key=lambda f: f.confidence)
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
self.embeddings[person_id] = embedding
|
||||
return True
|
||||
|
||||
def search(self, image, threshold=0.6):
|
||||
"""Search for faces in an image."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in self.embeddings.items():
|
||||
similarity = np.dot(embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
results.append({
|
||||
'bbox': face.bbox,
|
||||
'match': best_match if best_similarity >= threshold else None,
|
||||
'similarity': best_similarity
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def save(self, path):
|
||||
"""Save database to file."""
|
||||
np.savez(path, embeddings=dict(self.embeddings))
|
||||
|
||||
def load(self, path):
|
||||
"""Load database from file."""
|
||||
data = np.load(path, allow_pickle=True)
|
||||
self.embeddings = data['embeddings'].item()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
|
||||
# Add faces
|
||||
for image_path in Path("known_faces/").glob("*.jpg"):
|
||||
person_id = image_path.stem
|
||||
image = cv2.imread(str(image_path))
|
||||
try:
|
||||
db.add_face(person_id, image)
|
||||
print(f"Added: {person_id}")
|
||||
except ValueError as e:
|
||||
print(f"Skipped: {e}")
|
||||
|
||||
# Save database
|
||||
db.save("face_database.npz")
|
||||
|
||||
# Search
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
results = db.search(query_image)
|
||||
|
||||
for r in results:
|
||||
if r['match']:
|
||||
print(f"Found: {r['match']} (similarity: {r['similarity']:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def visualize_search_results(image, results):
|
||||
"""Draw search results on image."""
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0) # Green for match
|
||||
label = f"{r['match']} ({r['similarity']:.2f})"
|
||||
else:
|
||||
color = (0, 0, 255) # Red for unknown
|
||||
label = f"Unknown ({r['similarity']:.2f})"
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
results = db.search(image)
|
||||
annotated = visualize_search_results(image.copy(), results)
|
||||
cv2.imwrite("search_result.jpg", annotated)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Search
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def realtime_search(db):
|
||||
"""Real-time face search from webcam."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
results = db.search(frame, threshold=0.5)
|
||||
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0)
|
||||
label = r['match']
|
||||
else:
|
||||
color = (0, 0, 255)
|
||||
label = "Unknown"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Search", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
db.load("face_database.npz")
|
||||
realtime_search(db)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
281
docs/recipes/image-pipeline.md
Normal file
281
docs/recipes/image-pipeline.md
Normal file
@@ -0,0 +1,281 @@
|
||||
# Image Pipeline
|
||||
|
||||
A complete pipeline for processing images with detection, recognition, and attribute analysis.
|
||||
|
||||
---
|
||||
|
||||
## Basic Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
def process_image(image_path):
|
||||
"""Process a single image through the full pipeline."""
|
||||
# Load image
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Step 1: Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f"Found {len(faces)} face(s)")
|
||||
|
||||
results = []
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Step 2: Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Step 3: Predict attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
results.append({
|
||||
'face_id': i,
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'embedding': embedding,
|
||||
'gender': attrs.sex,
|
||||
'age': attrs.age
|
||||
})
|
||||
|
||||
print(f" Face {i+1}: {attrs.sex}, {attrs.age} years old")
|
||||
|
||||
# Visualize
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
return image, results
|
||||
|
||||
# Usage
|
||||
result_image, results = process_image("photo.jpg")
|
||||
cv2.imwrite("result.jpg", result_image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Using FaceAnalyzer
|
||||
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
# Process image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
# Access enriched Face objects
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Analysis Pipeline
|
||||
|
||||
Complete pipeline with all modules:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
# Initialize all models
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.age_gender = AgeGender()
|
||||
self.fairface = FairFace()
|
||||
self.landmarker = Landmark106()
|
||||
self.gaze = MobileGaze()
|
||||
self.parser = BiSeNet()
|
||||
self.spoofer = MiniFASNet()
|
||||
|
||||
def analyze(self, image):
|
||||
"""Run full analysis pipeline."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
result = {
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'landmarks_5': face.landmarks
|
||||
}
|
||||
|
||||
# Recognition embedding
|
||||
result['embedding'] = self.recognizer.get_normalized_embedding(
|
||||
image, face.landmarks
|
||||
)
|
||||
|
||||
# Attributes
|
||||
ag_result = self.age_gender.predict(image, face.bbox)
|
||||
result['age'] = ag_result.age
|
||||
result['gender'] = ag_result.sex
|
||||
|
||||
# FairFace attributes
|
||||
ff_result = self.fairface.predict(image, face.bbox)
|
||||
result['age_group'] = ff_result.age_group
|
||||
result['race'] = ff_result.race
|
||||
|
||||
# 106-point landmarks
|
||||
result['landmarks_106'] = self.landmarker.get_landmarks(
|
||||
image, face.bbox
|
||||
)
|
||||
|
||||
# Gaze estimation
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = self.gaze.estimate(face_crop)
|
||||
result['gaze_pitch'] = gaze_result.pitch
|
||||
result['gaze_yaw'] = gaze_result.yaw
|
||||
|
||||
# Face parsing
|
||||
if face_crop.size > 0:
|
||||
result['parsing_mask'] = self.parser.parse(face_crop)
|
||||
|
||||
# Anti-spoofing
|
||||
spoof_result = self.spoofer.predict(image, face.bbox)
|
||||
result['is_real'] = spoof_result.is_real
|
||||
result['spoof_confidence'] = spoof_result.confidence
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
# Usage
|
||||
pipeline = FaceAnalysisPipeline()
|
||||
results = pipeline.analyze(cv2.imread("photo.jpg"))
|
||||
|
||||
for i, r in enumerate(results):
|
||||
print(f"\nFace {i+1}:")
|
||||
print(f" Gender: {r['gender']}, Age: {r['age']}")
|
||||
print(f" Race: {r['race']}, Age Group: {r['age_group']}")
|
||||
print(f" Gaze: pitch={np.degrees(r['gaze_pitch']):.1f}°")
|
||||
print(f" Real: {r['is_real']} ({r['spoof_confidence']:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
gaze = MobileGaze()
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Age and gender
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
label = f"{attrs.sex}, {attrs.age}y"
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
|
||||
# Gaze
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = gaze.estimate(face_crop)
|
||||
draw_gaze(image, face.bbox, gaze_result.pitch, gaze_result.yaw)
|
||||
|
||||
# Confidence
|
||||
conf_label = f"{face.confidence:.0%}"
|
||||
cv2.putText(image, conf_label, (x1, y2 + 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
# Usage
|
||||
visualize_analysis("input.jpg", "output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JSON Output
|
||||
|
||||
Export results to JSON:
|
||||
|
||||
```python
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
def results_to_json(results):
|
||||
"""Convert analysis results to JSON-serializable format."""
|
||||
output = []
|
||||
|
||||
for r in results:
|
||||
item = {
|
||||
'bbox': r['bbox'].tolist(),
|
||||
'confidence': float(r['confidence']),
|
||||
'age': int(r['age']) if r.get('age') else None,
|
||||
'gender': r.get('gender'),
|
||||
'race': r.get('race'),
|
||||
'is_real': r.get('is_real'),
|
||||
'gaze': {
|
||||
'pitch_deg': float(np.degrees(r['gaze_pitch'])) if 'gaze_pitch' in r else None,
|
||||
'yaw_deg': float(np.degrees(r['gaze_yaw'])) if 'gaze_yaw' in r else None
|
||||
}
|
||||
}
|
||||
output.append(item)
|
||||
|
||||
return output
|
||||
|
||||
# Usage
|
||||
results = pipeline.analyze(image)
|
||||
json_data = results_to_json(results)
|
||||
|
||||
with open('results.json', 'w') as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Batch Processing](batch-processing.md) - Process multiple images
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Build a search system
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition details
|
||||
125
docs/recipes/video-webcam.md
Normal file
125
docs/recipes/video-webcam.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Video & Webcam
|
||||
|
||||
Real-time face analysis for video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# Setup output
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
# ... process and draw ...
|
||||
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
# Usage
|
||||
process_video("input.mp4", "output.mp4")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
|
||||
```python
|
||||
PROCESS_EVERY_N = 3 # Process every 3rd frame
|
||||
frame_count = 0
|
||||
last_faces = []
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if frame_count % PROCESS_EVERY_N == 0:
|
||||
last_faces = detector.detect(frame)
|
||||
frame_count += 1
|
||||
# Draw last_faces...
|
||||
```
|
||||
|
||||
### FPS Counter
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
prev_time = time.time()
|
||||
while True:
|
||||
curr_time = time.time()
|
||||
fps = 1 / (curr_time - prev_time)
|
||||
prev_time = curr_time
|
||||
|
||||
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
225
docs/stylesheets/extra.css
Normal file
225
docs/stylesheets/extra.css
Normal file
@@ -0,0 +1,225 @@
|
||||
/* UniFace Documentation - Custom Styles */
|
||||
|
||||
/* ===== Hero Section ===== */
|
||||
|
||||
.md-content .hero {
|
||||
text-align: center;
|
||||
padding: 3rem 1rem 2rem;
|
||||
margin: 0 auto;
|
||||
max-width: 900px;
|
||||
}
|
||||
|
||||
.hero-title {
|
||||
font-size: 3.5rem !important;
|
||||
font-weight: 800 !important;
|
||||
margin-bottom: 0.5rem !important;
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #7c4dff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.hero-tagline {
|
||||
font-size: 1.5rem;
|
||||
color: var(--md-default-fg-color);
|
||||
margin-bottom: 0.5rem !important;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
margin-bottom: 1.5rem !important;
|
||||
font-weight: 400;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
margin: 0.5rem 0.25rem;
|
||||
padding: 0.7rem 1.5rem;
|
||||
font-weight: 600;
|
||||
border-radius: 8px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.hero .md-button--primary {
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #5c6bc0 100%);
|
||||
border: none;
|
||||
box-shadow: 0 4px 14px rgba(63, 81, 181, 0.4);
|
||||
}
|
||||
|
||||
.hero .md-button--primary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 6px 20px rgba(63, 81, 181, 0.5);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid var(--md-primary-fg-color);
|
||||
background: transparent;
|
||||
color: var(--md-primary-fg-color);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary):hover {
|
||||
background: var(--md-primary-fg-color);
|
||||
border-color: var(--md-primary-fg-color);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Badge styling in hero */
|
||||
.hero p a img {
|
||||
margin: 0 3px;
|
||||
height: 24px !important;
|
||||
}
|
||||
|
||||
/* ===== Feature Grid ===== */
|
||||
.feature-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.5rem;
|
||||
border-radius: 12px;
|
||||
background: var(--md-code-bg-color);
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
transition: all 0.3s ease;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.feature-card::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 3px;
|
||||
background: linear-gradient(90deg, var(--md-primary-fg-color), #7c4dff);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.feature-card:hover {
|
||||
transform: translateY(-4px);
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.1);
|
||||
border-color: var(--md-primary-fg-color--light);
|
||||
}
|
||||
|
||||
.feature-card:hover::before {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.feature-card h3 {
|
||||
margin-top: 0 !important;
|
||||
margin-bottom: 0.75rem !important;
|
||||
font-size: 1rem !important;
|
||||
font-weight: 600;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.feature-card p {
|
||||
margin: 0;
|
||||
font-size: 0.875rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.feature-card a {
|
||||
display: inline-block;
|
||||
margin-top: 0.75rem;
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* ===== Next Steps Grid (2 columns) ===== */
|
||||
.next-steps-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card {
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card h3 {
|
||||
font-size: 1.1rem !important;
|
||||
}
|
||||
|
||||
/* ===== Dark Mode Adjustments ===== */
|
||||
[data-md-color-scheme="slate"] .hero-title {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .feature-card:hover {
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
box-shadow: 0 4px 14px rgba(124, 77, 255, 0.4);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary:hover {
|
||||
box-shadow: 0 6px 20px rgba(124, 77, 255, 0.5);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid rgba(255, 255, 255, 0.3);
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary):hover {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
border-color: rgba(255, 255, 255, 0.5);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* ===== Responsive Design ===== */
|
||||
@media (max-width: 1200px) {
|
||||
.feature-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.hero-title {
|
||||
font-size: 2.5rem !important;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.feature-grid,
|
||||
.next-steps-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
display: block;
|
||||
margin: 0.5rem auto;
|
||||
max-width: 200px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.hero-title {
|
||||
font-size: 2rem !important;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.25rem;
|
||||
}
|
||||
}
|
||||
311
examples/01_face_detection.ipynb
Normal file
311
examples/01_face_detection.ipynb
Normal file
File diff suppressed because one or more lines are too long
239
examples/02_face_alignment.ipynb
Normal file
239
examples/02_face_alignment.ipynb
Normal file
File diff suppressed because one or more lines are too long
271
examples/03_face_verification.ipynb
Normal file
271
examples/03_face_verification.ipynb
Normal file
File diff suppressed because one or more lines are too long
375
examples/04_face_search.ipynb
Normal file
375
examples/04_face_search.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/05_face_analyzer.ipynb
Normal file
324
examples/05_face_analyzer.ipynb
Normal file
File diff suppressed because one or more lines are too long
394
examples/06_face_parsing.ipynb
Normal file
394
examples/06_face_parsing.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/07_face_anonymization.ipynb
Normal file
324
examples/07_face_anonymization.ipynb
Normal file
File diff suppressed because one or more lines are too long
268
examples/08_gaze_estimation.ipynb
Normal file
268
examples/08_gaze_estimation.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
164
mkdocs.yml
Normal file
164
mkdocs.yml
Normal file
@@ -0,0 +1,164 @@
|
||||
site_name: UniFace
|
||||
site_description: All-in-One Face Analysis Library with ONNX Runtime
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_url: https://yakhyo.github.io/uniface
|
||||
|
||||
repo_name: yakhyo/uniface
|
||||
repo_url: https://github.com/yakhyo/uniface
|
||||
edit_uri: edit/main/docs/
|
||||
|
||||
copyright: Copyright © 2025 Yakhyokhuja Valikhujaev
|
||||
|
||||
theme:
|
||||
name: material
|
||||
custom_dir: docs/overrides
|
||||
palette:
|
||||
- media: "(prefers-color-scheme)"
|
||||
toggle:
|
||||
icon: material/link
|
||||
name: Switch to light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: black
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch-off-outline
|
||||
name: Switch to system preference
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- navigation.footer
|
||||
- navigation.indexes
|
||||
- navigation.instant
|
||||
- navigation.tracking
|
||||
- search.suggest
|
||||
- search.highlight
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
logo: material/book-open-page-variant
|
||||
repo: fontawesome/brands/git-alt
|
||||
admonition:
|
||||
note: octicons/tag-16
|
||||
abstract: octicons/checklist-16
|
||||
info: octicons/info-16
|
||||
tip: octicons/squirrel-16
|
||||
success: octicons/check-16
|
||||
question: octicons/question-16
|
||||
warning: octicons/alert-16
|
||||
failure: octicons/x-circle-16
|
||||
danger: octicons/zap-16
|
||||
bug: octicons/bug-16
|
||||
example: octicons/beaker-16
|
||||
quote: octicons/quote-16
|
||||
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/yakhyo
|
||||
- icon: fontawesome/brands/python
|
||||
link: https://pypi.org/project/uniface/
|
||||
- icon: fontawesome/brands/x-twitter
|
||||
link: https://x.com/y_valikhujaev
|
||||
analytics:
|
||||
provider: google
|
||||
property: G-XXXXXXXXXX
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- footnotes
|
||||
- attr_list
|
||||
- md_in_html
|
||||
- def_list
|
||||
- tables
|
||||
- toc:
|
||||
permalink: false
|
||||
toc_depth: 3
|
||||
- pymdownx.superfences:
|
||||
custom_fences:
|
||||
- name: mermaid
|
||||
class: mermaid
|
||||
format: !!python/name:pymdownx.superfences.fence_code_format
|
||||
- pymdownx.details
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- pymdownx.tasklist:
|
||||
custom_checkbox: true
|
||||
- pymdownx.keys
|
||||
- pymdownx.mark
|
||||
- pymdownx.critic
|
||||
- pymdownx.caret
|
||||
- pymdownx.tilde
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- git-committers:
|
||||
repository: yakhyo/uniface
|
||||
branch: main
|
||||
token: !ENV MKDOCS_GIT_COMMITTERS_APIKEY
|
||||
- git-revision-date-localized:
|
||||
enable_creation_date: true
|
||||
type: timeago
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Getting Started:
|
||||
- Installation: installation.md
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
- Face Search: recipes/face-search.md
|
||||
- Batch Processing: recipes/batch-processing.md
|
||||
- Anonymize Stream: recipes/anonymize-stream.md
|
||||
- Custom Models: recipes/custom-models.md
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
- Coordinate Systems: concepts/coordinate-systems.md
|
||||
- Execution Providers: concepts/execution-providers.md
|
||||
- Model Cache: concepts/model-cache-offline.md
|
||||
- Thresholds: concepts/thresholds-calibration.md
|
||||
- Resources:
|
||||
- Contributing: contributing.md
|
||||
- License: license-attribution.md
|
||||
- Releases: https://github.com/yakhyo/uniface/releases
|
||||
- Discussions: https://github.com/yakhyo/uniface/discussions
|
||||
130
pyproject.toml
130
pyproject.toml
@@ -1,12 +1,43 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
version = "2.0.2"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.11,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
"deep-learning",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
@@ -14,24 +45,103 @@ dependencies = [
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0"]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = {} }
|
||||
packages = { find = { where = ["."], include = ["uniface*"] } }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py311"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"*.egg-info",
|
||||
".venv",
|
||||
"venv",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
"*.ipynb",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = false
|
||||
warn_unused_ignores = true
|
||||
ignore_missing_imports = true
|
||||
exclude = ["tests/", "scripts/", "examples/"]
|
||||
# Disable strict return type checking for numpy operations
|
||||
disable_error_code = ["no-any-return"]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Collection of example scripts demonstrating UniFace functionality.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
- `run_detection.py` - Face detection on images
|
||||
- `run_age_gender.py` - Age and gender prediction
|
||||
- `run_landmarks.py` - Facial landmark detection
|
||||
- `run_recognition.py` - Face recognition and embeddings
|
||||
- `run_face_search.py` - Face search and matching
|
||||
- `run_video_detection.py` - Video processing with face detection
|
||||
- `batch_process.py` - Batch processing of image folders
|
||||
- `download_model.py` - Download and manage models
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
|
||||
# Age and gender detection
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
|
||||
# Webcam demo
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
```
|
||||
|
||||
## Import Examples
|
||||
|
||||
The scripts use direct class imports for better developer experience:
|
||||
|
||||
```python
|
||||
# Face Detection
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
|
||||
detector = RetinaFace() # or SCRFD()
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Face Recognition
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
recognizer = ArcFace() # or MobileFace(), SphereFace()
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
|
||||
# Age & Gender
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
age_gender = AgeGender()
|
||||
gender, age = age_gender.predict(image, bbox)
|
||||
|
||||
# Landmarks
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
```
|
||||
|
||||
## Available Classes
|
||||
|
||||
**Detection:**
|
||||
- `RetinaFace` - High accuracy face detection
|
||||
- `SCRFD` - Fast face detection
|
||||
|
||||
**Recognition:**
|
||||
- `ArcFace` - High accuracy face recognition
|
||||
- `MobileFace` - Lightweight face recognition
|
||||
- `SphereFace` - Alternative face recognition
|
||||
|
||||
**Attributes:**
|
||||
- `AgeGender` - Age and gender prediction
|
||||
|
||||
**Landmarks:**
|
||||
- `Landmark106` - 106-point facial landmarks
|
||||
|
||||
## Common Options
|
||||
|
||||
Most scripts support:
|
||||
- `--help` - Show usage information
|
||||
- `--verbose` - Enable detailed logging
|
||||
- `--detector` - Choose detector (retinaface, scrfd)
|
||||
- `--threshold` - Set confidence threshold
|
||||
|
||||
## Testing
|
||||
|
||||
Run basic functionality test:
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
|
||||
For comprehensive testing, see the main project tests:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
@@ -1,157 +0,0 @@
|
||||
"""Batch Image Processing Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
image_files = []
|
||||
for ext in extensions:
|
||||
image_files.extend(input_dir.glob(f"*.{ext}"))
|
||||
image_files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
|
||||
return sorted(image_files)
|
||||
|
||||
|
||||
def process_single_image(detector, image_path: Path, output_dir: Path,
|
||||
vis_threshold: float, skip_existing: bool) -> dict:
|
||||
output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}"
|
||||
|
||||
# Skip if already processed
|
||||
if skip_existing and output_path.exists():
|
||||
return {"status": "skipped", "faces": 0}
|
||||
|
||||
# Load image
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return {"status": "error", "error": "Failed to load image"}
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = detector.detect(image)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return {"status": "success", "faces": len(faces)}
|
||||
|
||||
|
||||
def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple,
|
||||
vis_threshold: float, skip_existing: bool):
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
# Create output directory
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get image files
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in '{input_dir}' with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Input: {input_dir}")
|
||||
print(f"Output: {output_dir}")
|
||||
print(f"Found {len(image_files)} images\n")
|
||||
|
||||
# Process images
|
||||
results = {
|
||||
"success": 0,
|
||||
"skipped": 0,
|
||||
"error": 0,
|
||||
"total_faces": 0
|
||||
}
|
||||
|
||||
with tqdm(image_files, desc="Processing images", unit="img") as pbar:
|
||||
for image_path in pbar:
|
||||
result = process_single_image(
|
||||
detector, image_path, output_path,
|
||||
vis_threshold, skip_existing
|
||||
)
|
||||
|
||||
if result["status"] == "success":
|
||||
results["success"] += 1
|
||||
results["total_faces"] += result["faces"]
|
||||
pbar.set_postfix({"faces": result["faces"]})
|
||||
elif result["status"] == "skipped":
|
||||
results["skipped"] += 1
|
||||
else:
|
||||
results["error"] += 1
|
||||
print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Print summary
|
||||
print(f"\nBatch processing complete!")
|
||||
print(f" Total images: {len(image_files)}")
|
||||
print(f" Successfully processed: {results['success']}")
|
||||
print(f" Skipped: {results['skipped']}")
|
||||
print(f" Errors: {results['error']}")
|
||||
print(f" Total faces detected: {results['total_faces']}")
|
||||
if results['success'] > 0:
|
||||
print(f" Average faces per image: {results['total_faces']/results['success']:.2f}")
|
||||
print(f"\nResults saved to: {output_dir}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True,
|
||||
help="Input directory containing images")
|
||||
parser.add_argument("--output", type=str, required=True,
|
||||
help="Output directory for processed images")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp",
|
||||
help="Comma-separated list of image extensions")
|
||||
parser.add_argument("--skip_existing", action="store_true",
|
||||
help="Skip files that already exist in output directory")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input directory exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Parse extensions
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process batch
|
||||
batch_process(detector, args.input, args.output, extensions,
|
||||
args.threshold, args.skip_existing)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,77 +0,0 @@
|
||||
import argparse
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights,
|
||||
SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
|
||||
# All available model types
|
||||
ALL_MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download and verify model weights.")
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
type=str,
|
||||
choices=list(ALL_MODEL_TYPES.keys()),
|
||||
help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model and not args.model_type:
|
||||
# Backward compatibility - assume RetinaFace
|
||||
try:
|
||||
weight = RetinaFaceWeights[args.model]
|
||||
print(f"Downloading RetinaFace model: {weight.value}")
|
||||
verify_model_weights(weight)
|
||||
print("Model downloaded successfully.")
|
||||
except KeyError:
|
||||
print(f"Invalid RetinaFace model: {args.model}")
|
||||
print(f"Available models: {[m.name for m in RetinaFaceWeights]}")
|
||||
return
|
||||
|
||||
if args.model_type:
|
||||
# Download all models from specific type
|
||||
model_enum = ALL_MODEL_TYPES[args.model_type]
|
||||
print(f"Downloading all {args.model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
else:
|
||||
# Download all models from all types
|
||||
print("Downloading all models...")
|
||||
for model_type, model_enum in ALL_MODEL_TYPES.items():
|
||||
print(f"\nDownloading {model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
|
||||
print("\nDownload process completed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,163 +0,0 @@
|
||||
"""Age and Gender Detection Demo Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f" Face {i+1}: {gender}, {age} years old")
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, vis_threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face['bbox'])
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(frame, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(frame, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing age/gender model...")
|
||||
age_gender = AgeGender()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,80 +0,0 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
# 1. Get the list of face dictionaries from the detector
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
# 2. Unpack the data into separate lists
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
|
||||
# 3. Pass the unpacked lists to the drawing function
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved at: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection on an image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
|
||||
parser.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Detection method to use."
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
|
||||
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.method}")
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
avg_time = 0
|
||||
for i in range(args.iterations):
|
||||
start = time.time()
|
||||
run_inference(detector, args.image, args.threshold, args.save_dir)
|
||||
elapsed = time.time() - start
|
||||
print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds")
|
||||
if i >= 0: # Avoid counting the first run if it includes model loading time
|
||||
avg_time += elapsed
|
||||
|
||||
if args.iterations > 1:
|
||||
# Adjust average calculation to exclude potential first-run overhead
|
||||
effective_iterations = max(1, args.iterations)
|
||||
print(
|
||||
f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,110 +0,0 @@
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
# Get landmarks from the first detected face dictionary
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
|
||||
# Use normalized embedding for more reliable similarity comparison
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
return embedding
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
print("Webcam started. Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Loop through each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from the dictionary
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Get the normalized embedding for the current face
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
|
||||
# Compare with the reference embedding
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
# Draw results
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser.add_argument(
|
||||
"--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
help="Face recognition method.",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
|
||||
enable_logging()
|
||||
|
||||
print("Initializing models...")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
|
||||
print("Extracting reference embedding...")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,149 +0,0 @@
|
||||
"""Facial Landmark Detection Demo Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, Landmark106
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Process each face
|
||||
for i, face in enumerate(faces):
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i+1}: Extracted {len(landmarks)} landmarks")
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Face {i+1}", (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
# Add total count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each face
|
||||
for face in faces:
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing landmark detector...")
|
||||
landmarker = Landmark106()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,128 +0,0 @@
|
||||
import cv2
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
|
||||
|
||||
# Process the first detected face
|
||||
first_face = faces[0]
|
||||
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
|
||||
|
||||
# Extract embedding using the landmarks from the face dictionary
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Print some info about the embeddings
|
||||
print(f" - Embedding shape: {embedding.shape}")
|
||||
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
|
||||
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
|
||||
# Load images
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print(f"Error: Failed to load images")
|
||||
return
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print("Error: No faces detected in one or both images")
|
||||
return
|
||||
|
||||
# Get landmarks for first face in each image
|
||||
landmarks1 = np.array(faces1[0]['landmarks'])
|
||||
landmarks2 = np.array(faces2[0]['landmarks'])
|
||||
|
||||
# Get normalized embeddings
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# Compute similarity
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'}")
|
||||
print(f"Threshold: {threshold}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison.")
|
||||
parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.")
|
||||
parser.add_argument("--image1", type=str, help="Path to first image for comparison.")
|
||||
parser.add_argument("--image2", type=str, help="Path to second image for comparison.")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.")
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method to use."
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
|
||||
if args.image1 and args.image2:
|
||||
# Face comparison mode
|
||||
print(f"Comparing faces: {args.image1} vs {args.image2}")
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
# Single image embedding extraction mode
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison")
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,142 +0,0 @@
|
||||
"""Video Face Detection Script"""
|
||||
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
|
||||
fps: int = None, show_preview: bool = False):
|
||||
# Open input video
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
source_fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
output_fps = fps if fps is not None else source_fps
|
||||
|
||||
print(f"📹 Input: {input_path}")
|
||||
print(f" Resolution: {width}x{height}")
|
||||
print(f" FPS: {source_fps:.2f}")
|
||||
print(f" Total frames: {total_frames}")
|
||||
print(f"\n📹 Output: {output_path}")
|
||||
print(f" FPS: {output_fps:.2f}\n")
|
||||
|
||||
# Initialize video writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
# Process frames
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
try:
|
||||
with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Add frame info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Write frame
|
||||
out.write(frame)
|
||||
|
||||
# Show preview if requested
|
||||
if show_preview:
|
||||
cv2.imshow("Processing Video - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print("\nProcessing cancelled by user")
|
||||
break
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nProcessing interrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Summary
|
||||
print(f"\nProcessing complete!")
|
||||
print(f" Processed: {frame_count} frames")
|
||||
print(f" Total faces detected: {total_faces}")
|
||||
print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
|
||||
print(f" Output saved: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Path to input video")
|
||||
parser.add_argument("--output", type=str, required=True, help="Path to output video")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--fps", type=int, default=None,
|
||||
help="Output FPS (default: same as input)")
|
||||
parser.add_argument("--preview", action="store_true",
|
||||
help="Show live preview during processing")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir = Path(args.output).parent
|
||||
if output_dir != Path('.'):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process video
|
||||
process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,35 +0,0 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compute SHA256 hash of a model weight file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"file",
|
||||
type=Path,
|
||||
help="Path to the model weight file (.onnx, .pth, etc)."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f"File does not exist: {args.file}")
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"`SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,7 +1,15 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -20,23 +28,26 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, "AgeGender model initialization failed."
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender, str), f"Gender should be string, got {type(gender)}"
|
||||
assert isinstance(age, int), f"Age should be int, got {type(age)}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Gender should be 'Male' or 'Female', got '{gender}'"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for bbox {bbox}"
|
||||
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for image size {size}"
|
||||
assert 0 <= age <= 120, f"Age out of range for image size {size}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender1 == gender2, "Same input should produce same gender prediction"
|
||||
assert age1 == age2, "Same input should produce same age prediction"
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as list"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as numpy array"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,24 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, "Should have 3 predictions"
|
||||
for gender, age in results:
|
||||
assert gender in ['Male', 'Female']
|
||||
assert 0 <= age <= 120
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f"Age should be non-negative, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f"{gender}, {age}y"
|
||||
assert isinstance(text, str), "Should be able to format as string"
|
||||
assert "Male" in text or "Female" in text, "Text should contain gender"
|
||||
assert "y" in text, "Text should contain 'y' for years"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -17,7 +25,7 @@ def test_create_detector_retinaface():
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector('retinaface')
|
||||
assert detector is not None, "Failed to create RetinaFace detector"
|
||||
assert detector is not None, 'Failed to create RetinaFace detector'
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
@@ -25,7 +33,7 @@ def test_create_detector_scrfd():
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector('scrfd')
|
||||
assert detector is not None, "Failed to create SCRFD detector"
|
||||
assert detector is not None, 'Failed to create SCRFD detector'
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
@@ -35,10 +43,10 @@ def test_create_detector_with_config():
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, "Failed to create detector with custom config"
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
@@ -53,12 +61,8 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector(
|
||||
'scrfd',
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
assert detector is not None, "Failed to create SCRFD with specific model"
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
@@ -67,7 +71,7 @@ def test_create_recognizer_arcface():
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('arcface')
|
||||
assert recognizer is not None, "Failed to create ArcFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create ArcFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
@@ -75,7 +79,7 @@ def test_create_recognizer_mobileface():
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('mobileface')
|
||||
assert recognizer is not None, "Failed to create MobileFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create MobileFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
@@ -83,7 +87,7 @@ def test_create_recognizer_sphereface():
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('sphereface')
|
||||
assert recognizer is not None, "Failed to create SphereFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
@@ -100,7 +104,7 @@ def test_create_landmarker():
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker('2d106det')
|
||||
assert landmarker is not None, "Failed to create Landmark106 detector"
|
||||
assert landmarker is not None, 'Failed to create Landmark106 detector'
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
@@ -108,7 +112,7 @@ def test_create_landmarker_default():
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, "Failed to create default landmarker"
|
||||
assert landmarker is not None, 'Failed to create default landmarker'
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
@@ -127,7 +131,7 @@ def test_detect_faces_retinaface():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
@@ -137,7 +141,7 @@ def test_detect_faces_scrfd():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
@@ -145,13 +149,13 @@ def test_detect_faces_with_threshold():
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, "All detections should meet confidence threshold"
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -161,7 +165,7 @@ def test_detect_faces_default_method():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list with default method"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
@@ -171,8 +175,8 @@ def test_detect_faces_empty_image():
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "Should return a list even for empty image"
|
||||
assert len(faces) == 0, "Should detect no faces in blank image"
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
@@ -182,8 +186,8 @@ def test_list_available_detectors():
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), "Should return a dictionary of detectors"
|
||||
assert len(detectors) > 0, "Should have at least one detector available"
|
||||
assert isinstance(detectors, dict), 'Should return a dictionary of detectors'
|
||||
assert len(detectors) > 0, 'Should have at least one detector available'
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
@@ -206,7 +210,7 @@ def test_detector_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detector should return list of faces"
|
||||
assert isinstance(faces, list), 'Detector should return list of faces'
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
@@ -217,8 +221,8 @@ def test_recognizer_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, "Recognizer should return embedding"
|
||||
assert embedding.shape[1] == 512, "Should return 512-dimensional embedding"
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
@@ -230,8 +234,8 @@ def test_landmarker_inference_from_factory():
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, "Landmarker should return landmarks"
|
||||
assert landmarks.shape == (106, 2), "Should return 106 landmarks"
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
@@ -243,15 +247,15 @@ def test_multiple_detector_creation():
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, "Should create separate instances"
|
||||
assert detector1 is not detector2, 'Should create separate instances'
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
@@ -267,12 +271,12 @@ def test_factory_returns_correct_types():
|
||||
"""
|
||||
Test that factory functions return instances of the correct types.
|
||||
"""
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface import ArcFace, Landmark106, RetinaFace
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
|
||||
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
|
||||
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"
|
||||
assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance"
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -20,17 +28,17 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, "Landmark106 model initialization failed."
|
||||
assert landmark_model is not None, 'Landmark106 model initialization failed.'
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}"
|
||||
assert landmarks.shape == (106, 2), f'Expected shape (106, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}"
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
@@ -45,8 +53,8 @@ def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds"
|
||||
assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds"
|
||||
assert x_in_bounds >= 95, f'Only {x_in_bounds}/106 x-coordinates within bounds'
|
||||
assert y_in_bounds >= 95, f'Only {y_in_bounds}/106 y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
@@ -58,22 +66,22 @@ def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape"
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type"
|
||||
assert landmarks_int.shape == (106, 2), 'Integer conversion should preserve shape'
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], 'Should convert to integer type'
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks"
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
@@ -82,19 +90,19 @@ def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for image size {size}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as list"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as numpy array"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
@@ -103,5 +111,5 @@ def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, "Landmarks should have variation in x-coordinates"
|
||||
assert y_variance > 0, "Landmarks should have variation in y-coordinates"
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
|
||||
122
tests/test_parsing.py
Normal file
122
tests/test_parsing.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -41,13 +49,16 @@ def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# ArcFace Tests
|
||||
@@ -55,7 +66,7 @@ def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, "ArcFace model initialization failed."
|
||||
assert arcface_model is not None, 'ArcFace model initialization failed.'
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
@@ -65,8 +76,8 @@ def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
@@ -80,7 +91,7 @@ def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
@@ -88,7 +99,7 @@ def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}"
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
@@ -98,7 +109,7 @@ def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), "Same input should produce same embedding"
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
@@ -106,7 +117,7 @@ def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, "MobileFace model initialization failed."
|
||||
assert mobileface_model is not None, 'MobileFace model initialization failed.'
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
@@ -116,8 +127,8 @@ def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
@@ -129,7 +140,7 @@ def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
@@ -137,7 +148,7 @@ def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, "SphereFace model initialization failed."
|
||||
assert sphereface_model is not None, 'SphereFace model initialization failed.'
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
@@ -147,8 +158,8 @@ def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
@@ -160,7 +171,7 @@ def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
@@ -173,8 +184,7 @@ def test_different_models_different_embeddings(arcface_model, mobileface_model,
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), \
|
||||
"Different models should produce different embeddings"
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), 'Different models should produce different embeddings'
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
@@ -191,10 +201,11 @@ def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
@@ -205,7 +216,8 @@ def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}"
|
||||
assert similarity > 0.99, f'Same face should have similarity > 0.99, got {similarity}'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,35 +17,35 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
assert retinaface_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
@@ -45,11 +53,11 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,33 +17,33 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, "Model initialization failed."
|
||||
assert scrfd_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
@@ -43,14 +51,14 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
@@ -59,13 +67,13 @@ def test_different_input_sizes(scrfd_model):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f"Should return list for size {size}"
|
||||
assert isinstance(faces, list), f'Should return list for size {size}'
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, "SCRFD 10G model initialization failed."
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), "SCRFD 10G should return list of detections."
|
||||
assert isinstance(faces, list), 'SCRFD 10G should return list of detections.'
|
||||
|
||||
282
tests/test_types.py
Normal file
282
tests/test_types.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -18,13 +26,16 @@ def mock_landmarks():
|
||||
Create mock 5-point facial landmarks.
|
||||
Standard positions for a face roughly centered at (112/2, 112/2).
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041] # Right mouth corner
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041], # Right mouth corner
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# compute_similarity tests
|
||||
@@ -36,7 +47,7 @@ def test_compute_similarity_same_embedding():
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}"
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f'Self-similarity should be 1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
@@ -53,7 +64,7 @@ def test_compute_similarity_range():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
@@ -68,7 +79,7 @@ def test_compute_similarity_orthogonal():
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}"
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f'Orthogonal embeddings should have similarity 0.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
@@ -81,7 +92,7 @@ def test_compute_similarity_opposite():
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}"
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f'Opposite embeddings should have similarity -1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
@@ -98,7 +109,7 @@ def test_compute_similarity_symmetry():
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), "Similarity should be symmetric"
|
||||
assert np.isclose(sim_12, sim_21), 'Similarity should be symmetric'
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
@@ -113,7 +124,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}"
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -123,7 +134,7 @@ def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}"
|
||||
assert aligned.shape == (112, 112, 3), f'Expected shape (112, 112, 3), got {aligned.shape}'
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
@@ -132,7 +143,7 @@ def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}"
|
||||
assert aligned.dtype == np.uint8, f'Expected uint8, got {aligned.dtype}'
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
@@ -144,7 +155,7 @@ def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f"Failed for size {size}"
|
||||
assert aligned.shape == (*size, 3), f'Failed for size {size}'
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
@@ -154,7 +165,7 @@ def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face"
|
||||
assert np.allclose(aligned1, aligned2), 'Same input should produce same aligned face'
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
@@ -166,13 +177,13 @@ def test_face_alignment_landmarks_as_list(mock_image):
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
]
|
||||
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with landmarks as array"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with landmarks as array'
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
@@ -181,8 +192,8 @@ def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), "Pixel values should be >= 0"
|
||||
assert np.all(aligned <= 255), "Pixel values should be <= 255"
|
||||
assert np.all(aligned >= 0), 'Pixel values should be >= 0'
|
||||
assert np.all(aligned <= 255), 'Pixel values should be <= 255'
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
@@ -192,7 +203,7 @@ def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), "Aligned face should have some non-zero pixels"
|
||||
assert np.any(aligned > 0), 'Aligned face should have some non-zero pixels'
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
@@ -201,14 +212,23 @@ def test_face_alignment_from_different_positions(mock_image):
|
||||
"""
|
||||
# Landmarks at different positions
|
||||
positions = [
|
||||
np.array([[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]], dtype=np.float32),
|
||||
np.array([[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]], dtype=np.float32),
|
||||
np.array([[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]], dtype=np.float32),
|
||||
np.array(
|
||||
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
]
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}"
|
||||
assert aligned.shape == (112, 112, 3), f'Failed for landmarks at {landmarks[0]}'
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
@@ -216,16 +236,19 @@ def test_face_alignment_landmark_count(mock_image):
|
||||
Test that face_alignment works specifically with 5-point landmarks.
|
||||
"""
|
||||
# Standard 5-point landmarks
|
||||
landmarks_5pt = np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
landmarks_5pt = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with 5-point landmarks'
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
@@ -244,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
121
tools/README.md
Normal file
121
tools/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
```
|
||||
213
tools/age_gender.py
Normal file
213
tools/age_gender.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
105
tools/batch_process.py
Normal file
105
tools/batch_process.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f'*.{ext}'))
|
||||
files.extend(input_dir.glob(f'*.{ext.upper()}'))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Batch process images with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input directory')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output directory')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f'No images found with extensions {extensions}')
|
||||
return
|
||||
|
||||
print(f'Found {len(image_files)} images')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc='Processing', unit='img'):
|
||||
out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f'\nFailed: {img_path.name}')
|
||||
|
||||
print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
196
tools/detection.py
Normal file
196
tools/detection.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
60
tools/download_model.py
Normal file
60
tools/download_model.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f'Downloading: {weight.value}')
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f' Done: {weight.value}')
|
||||
except Exception as e:
|
||||
print(f' Failed: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Download model weights')
|
||||
parser.add_argument(
|
||||
'--model-type',
|
||||
type=str,
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help='Model type to download. If not specified, downloads all.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_type:
|
||||
print(f'Downloading {args.model_type} models...')
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
print('Downloading all models...')
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f'\n{name}:')
|
||||
download_models(model_enum)
|
||||
|
||||
print('\nDone!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
239
tools/face_analyzer.py
Normal file
239
tools/face_analyzer.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
tools/face_anonymize.py
Normal file
281
tools/face_anonymize.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
213
tools/face_emotion.py
Normal file
213
tools/face_emotion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
250
tools/face_parsing.py
Normal file
250
tools/face_parsing.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
bbox: np.ndarray,
|
||||
image_shape: tuple[int, int],
|
||||
expand_ratio: float = 0.2,
|
||||
expand_top_ratio: float = 0.4,
|
||||
) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Expand bounding box to include full head region for face parsing.
|
||||
|
||||
Face detection typically returns tight face boxes, but face parsing
|
||||
requires the full head including hair, ears, and neck.
|
||||
|
||||
Args:
|
||||
bbox: Original bounding box [x1, y1, x2, y2].
|
||||
image_shape: Image dimensions as (height, width).
|
||||
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
|
||||
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
height, width = image_shape[:2]
|
||||
|
||||
face_width = x2 - x1
|
||||
face_height = y2 - y1
|
||||
|
||||
expand_x = int(face_width * expand_ratio)
|
||||
expand_y_bottom = int(face_height * expand_ratio)
|
||||
expand_y_top = int(face_height * expand_top_ratio)
|
||||
|
||||
new_x1 = max(0, x1 - expand_x)
|
||||
new_y1 = max(0, y1 - expand_y_top)
|
||||
new_x2 = min(width, x2 + expand_x)
|
||||
new_y2 = min(height, y2 + expand_y_bottom)
|
||||
|
||||
return new_x1, new_y1, new_x2, new_y2
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
parser_arg.add_argument(
|
||||
'--expand-ratio',
|
||||
type=float,
|
||||
default=0.2,
|
||||
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/face_search.py
Normal file
190
tools/face_search.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.reference):
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/fairface.py
Normal file
214
tools/fairface.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/gaze_estimation.py
Normal file
190
tools/gaze_estimation.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
187
tools/landmarks.py
Normal file
187
tools/landmarks.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
110
tools/recognition.py
Normal file
110
tools/recognition.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print('No faces detected.')
|
||||
return
|
||||
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
print(f' Embedding shape: {embedding.shape}')
|
||||
print(f' L2 norm (raw): {np.linalg.norm(embedding):.4f}')
|
||||
print(f' L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}')
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print('Error: Failed to load one or both images')
|
||||
return
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f'Similarity: {similarity:.4f}')
|
||||
print(f'Result: {"Same person" if is_match else "Different person"} (threshold: {threshold})')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face recognition and comparison')
|
||||
parser.add_argument('--image', type=str, help='Single image for embedding extraction')
|
||||
parser.add_argument('--image1', type=str, help='First image for comparison')
|
||||
parser.add_argument('--image2', type=str, help='Second image for comparison')
|
||||
parser.add_argument('--threshold', type=float, default=0.35, help='Similarity threshold')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print('Error: Provide --image or both --image1 and --image2')
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
28
tools/sha256_generate.py
Normal file
28
tools/sha256_generate.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open('rb') as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b''):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Compute SHA256 hash of a file')
|
||||
parser.add_argument('file', type=Path, help='Path to file')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f'File does not exist: {args.file}')
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/spoofing.py
Normal file
214
tools/spoofing.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face Anti-Spoofing Detection.
|
||||
|
||||
Usage:
|
||||
python tools/spoofing.py --source path/to/image.jpg
|
||||
python tools/spoofing.py --source path/to/video.mp4
|
||||
python tools/spoofing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
bbox: list,
|
||||
is_real: bool,
|
||||
confidence: float,
|
||||
thickness: int = 2,
|
||||
) -> None:
|
||||
"""Draw bounding box with anti-spoofing result.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box in [x1, y1, x2, y2] format.
|
||||
is_real: True if real face, False if fake.
|
||||
confidence: Confidence score (0.0 to 1.0).
|
||||
thickness: Line thickness for bounding box.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
|
||||
color = (0, 255, 0) if is_real else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
label = 'Real' if is_real else 'Fake'
|
||||
text = f'{label}: {confidence:.1%}'
|
||||
|
||||
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a single image for face anti-spoofing detection."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print('No faces detected in the image.')
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f' Face {i}: {label} ({result.confidence:.1%})')
|
||||
|
||||
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a video file for face anti-spoofing detection."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
|
||||
"""Run real-time anti-spoofing detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
cv2.imshow('Face Anti-Spoofing', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
default='v2',
|
||||
choices=['v1se', 'v2'],
|
||||
help='Model variant: v1se or v2 (default: v2)',
|
||||
)
|
||||
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select model variant
|
||||
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
|
||||
|
||||
# Initialize models
|
||||
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
|
||||
detector = RetinaFace()
|
||||
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, spoofer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, spoofer, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, spoofer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
tools/video_detection.py
Normal file
180
tools/video_detection.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -11,52 +11,105 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "1.1.0"
|
||||
"""UniFace: A comprehensive library for face analysis.
|
||||
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face)
|
||||
- Face recognition (ArcFace, MobileFace, SphereFace)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
- Age, gender, and emotion prediction
|
||||
- Face anti-spoofing
|
||||
- Privacy/anonymization
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '2.0.2'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .attribute import AgeGender
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__license__",
|
||||
"__version__",
|
||||
# Metadata
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
# Core classes
|
||||
'Face',
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
"create_detector",
|
||||
"create_landmarker",
|
||||
"create_recognizer",
|
||||
"detect_faces",
|
||||
"list_available_detectors",
|
||||
'create_detector',
|
||||
'create_face_parser',
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
"RetinaFace",
|
||||
"SCRFD",
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
'YOLOv5Face',
|
||||
# Recognition models
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
"Landmark106",
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'GazeResult',
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
"compute_similarity",
|
||||
"draw_detections",
|
||||
"face_alignment",
|
||||
"verify_model_weights",
|
||||
"Logger",
|
||||
"enable_logging",
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
113
uniface/analyzer.py
Normal file
113
uniface/analyzer.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes.
|
||||
|
||||
This class provides a high-level interface for face analysis by combining
|
||||
multiple components: face detection, recognition (embedding extraction),
|
||||
and attribute prediction (age, gender, race).
|
||||
|
||||
Args:
|
||||
detector: Face detector instance for detecting faces in images.
|
||||
recognizer: Optional face recognizer for extracting embeddings.
|
||||
age_gender: Optional age/gender predictor.
|
||||
fairface: Optional FairFace predictor for demographics.
|
||||
|
||||
Example:
|
||||
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
>>> detector = RetinaFace()
|
||||
>>> recognizer = ArcFace()
|
||||
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
>>> faces = analyzer.analyze(image)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: BaseRecognizer | None = None,
|
||||
age_gender: AgeGender | None = None,
|
||||
fairface: FairFace | None = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
self.fairface = fairface
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
if fairface:
|
||||
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> list[Face]:
|
||||
"""Analyze faces in an image.
|
||||
|
||||
Performs face detection and optionally extracts embeddings and
|
||||
predicts attributes for each detected face.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
|
||||
Returns:
|
||||
List of Face objects with detection results and any predicted attributes.
|
||||
"""
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
result = self.age_gender.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
if self.fairface is not None:
|
||||
try:
|
||||
result = self.fairface.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
|
||||
if self.recognizer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
if self.fairface:
|
||||
parts.append(f'fairface={self.fairface.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,16 +2,22 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict, Any, List, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
@@ -19,25 +25,28 @@ except ImportError:
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = [
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
"create_attribute_predictor",
|
||||
"predict_attributes"
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
|
||||
def create_attribute_predictor(
|
||||
model_name: Union[AgeGenderWeights, DDAMFNWeights],
|
||||
**kwargs: Any
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
@@ -47,11 +56,13 @@ def create_attribute_predictor(
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -59,48 +70,45 @@ def create_attribute_predictor(
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(f"Unsupported attribute model: {model_name}. "
|
||||
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. '
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray,
|
||||
detections: List[Dict[str, np.ndarray]],
|
||||
predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender'] = gender
|
||||
face['attributes']['age'] = age
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return detections
|
||||
return faces
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,8 +12,9 @@ from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ["AgeGender"]
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
|
||||
class AgeGender(Attribute):
|
||||
@@ -22,20 +22,32 @@ class AgeGender(Attribute):
|
||||
Age and gender prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender (0 for female,
|
||||
1 for male) from a face image. It requires a bounding box to locate the face.
|
||||
functionality for predicting age (in years) and gender ID (0 for Female,
|
||||
1 for Male) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
Defaults to `AgeGenderWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights
|
||||
to load.
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
Logger.info(f"Initializing AgeGender with model={model_name.name}")
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._user_input_size = input_size # Store user preference
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
@@ -47,14 +59,29 @@ class AgeGender(Attribute):
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
# Use user-provided size if given, otherwise auto-detect from model
|
||||
model_input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
if self._user_input_size is not None:
|
||||
self.input_size = self._user_input_size
|
||||
if self._user_input_size != model_input_size:
|
||||
Logger.warning(
|
||||
f'Using custom input_size {self.input_size}, '
|
||||
f'but model expects {model_input_size}. This may affect accuracy.'
|
||||
)
|
||||
else:
|
||||
self.input_size = model_input_size
|
||||
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
@@ -76,11 +103,15 @@ class AgeGender(Attribute):
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[str, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -88,17 +119,15 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label ("Female" or "Male")
|
||||
and age (in years).
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = "Female" if gender_id == 0 else "Male"
|
||||
gender = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender, age
|
||||
return AttributeResult(gender=gender, age=age)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[str, int]:
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -107,66 +136,8 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label and age.
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender, age = self.postprocess(prediction)
|
||||
return gender, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection["bbox"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender, age = age_gender_predictor.predict(frame, box)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{gender}, {age}"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
return self.postprocess(prediction)
|
||||
|
||||
@@ -4,8 +4,13 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
@@ -26,7 +31,7 @@ class Attribute(ABC):
|
||||
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
|
||||
warm-up procedures to prepare the model for prediction.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the _initialize_model method.")
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -46,7 +51,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The preprocessed data ready for model inference.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the preprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, prediction: Any) -> Any:
|
||||
@@ -63,7 +68,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final, processed attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the postprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -82,7 +87,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final predicted attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the predict method.")
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""
|
||||
|
||||
@@ -2,18 +2,19 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from typing import Tuple, Union, List
|
||||
import torch
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.types import EmotionResult
|
||||
|
||||
__all__ = ["Emotion"]
|
||||
__all__ = ['Emotion']
|
||||
|
||||
|
||||
class Emotion(Attribute):
|
||||
@@ -28,7 +29,7 @@ class Emotion(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
@@ -37,15 +38,30 @@ class Emotion(Attribute):
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f"Initializing Emotion with model={model_weights.name}")
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
Logger.info(f'Initializing Emotion with model={model_weights.name}')
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.device = torch.device('mps')
|
||||
elif torch.cuda.is_available():
|
||||
self.device = torch.device('cuda')
|
||||
else:
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
|
||||
self.emotion_labels = [
|
||||
'Neutral',
|
||||
'Happy',
|
||||
'Sad',
|
||||
'Surprise',
|
||||
'Fear',
|
||||
'Disgust',
|
||||
'Angry',
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append("Contempt")
|
||||
self.emotion_labels.append('Contempt')
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
@@ -60,12 +76,12 @@ class Emotion(Attribute):
|
||||
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
|
||||
with torch.no_grad():
|
||||
self.model(dummy_input)
|
||||
Logger.info(f"Successfully initialized Emotion model on {self.device}")
|
||||
Logger.info(f'Successfully initialized Emotion model on {self.device}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
@@ -77,7 +93,7 @@ class Emotion(Attribute):
|
||||
torch.Tensor: The preprocessed image tensor ready for inference.
|
||||
"""
|
||||
landmark = np.asarray(landmark)
|
||||
|
||||
|
||||
aligned_image, _ = face_alignment(image, landmark)
|
||||
|
||||
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
|
||||
@@ -90,7 +106,7 @@ class Emotion(Attribute):
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
@@ -98,9 +114,9 @@ class Emotion(Attribute):
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
return EmotionResult(emotion=emotion_label, confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
@@ -111,60 +127,3 @@ class Emotion(Attribute):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import create_detector
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{emotion} ({confidence:.2f})"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
193
uniface/attribute/fairface.py
Normal file
193
uniface/attribute/fairface.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import FairFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
|
||||
|
||||
# Label definitions
|
||||
RACE_LABELS = [
|
||||
'White',
|
||||
'Black',
|
||||
'Latino Hispanic',
|
||||
'East Asian',
|
||||
'Southeast Asian',
|
||||
'Indian',
|
||||
'Middle Eastern',
|
||||
]
|
||||
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
|
||||
|
||||
|
||||
class FairFace(Attribute):
|
||||
"""
|
||||
FairFace attribute prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting race (7 categories), gender (2 categories),
|
||||
and age (9 groups) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
The model is trained on the FairFace dataset which provides balanced demographics
|
||||
for more equitable predictions across different racial and gender groups.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
Defaults to `FairFaceWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224). Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FairFace prediction model.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224).
|
||||
"""
|
||||
Logger.info(f'Initializing FairFace with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self.input_size = input_size if input_size is not None else (224, 224)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load FairFace model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Preprocesses the face image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
# Crop face if bbox provided
|
||||
if bbox is not None:
|
||||
bbox = np.asarray(bbox, dtype=int)
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
|
||||
# Add padding (25% of face size)
|
||||
w, h = x2 - x1, y2 - y1
|
||||
padding = 0.25
|
||||
x_pad = int(w * padding)
|
||||
y_pad = int(h * padding)
|
||||
|
||||
x1 = max(0, x1 - x_pad)
|
||||
y1 = max(0, y1 - y_pad)
|
||||
x2 = min(image.shape[1], x2 + x_pad)
|
||||
y2 = min(image.shape[0], y2 + y_pad)
|
||||
|
||||
image = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize to input size (width, height for cv2.resize)
|
||||
image = cv2.resize(image, self.input_size[::-1])
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize with ImageNet mean and std
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# Transpose to CHW format and add batch dimension
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract race, gender, and age.
|
||||
|
||||
Args:
|
||||
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
|
||||
(race_logits, gender_logits, age_logits).
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
|
||||
"""
|
||||
race_logits, gender_logits, age_logits = prediction
|
||||
|
||||
# Apply softmax
|
||||
race_probs = self._softmax(race_logits[0])
|
||||
gender_probs = self._softmax(gender_logits[0])
|
||||
age_probs = self._softmax(age_logits[0])
|
||||
|
||||
# Get predictions
|
||||
race_idx = int(np.argmax(race_probs))
|
||||
raw_gender_idx = int(np.argmax(gender_probs))
|
||||
age_idx = int(np.argmax(age_probs))
|
||||
|
||||
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
|
||||
gender = 1 - raw_gender_idx
|
||||
|
||||
return AttributeResult(
|
||||
gender=gender,
|
||||
age_group=AGE_LABELS[age_idx],
|
||||
race=RACE_LABELS[race_idx],
|
||||
)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
|
||||
"""
|
||||
Predicts race, gender, and age for a face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing:
|
||||
- gender: 0=Female, 1=Male
|
||||
- age_group: Age range string like "20-29"
|
||||
- race: Race/ethnicity label
|
||||
"""
|
||||
# Preprocess
|
||||
input_blob = self.preprocess(image, bbox)
|
||||
|
||||
# Inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
|
||||
|
||||
# Postprocess
|
||||
return self.postprocess(outputs)
|
||||
|
||||
@staticmethod
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
"""Compute softmax values for numerical stability."""
|
||||
exp_x = np.exp(x - np.max(x))
|
||||
return exp_x / np.sum(exp_x)
|
||||
@@ -2,24 +2,42 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List
|
||||
__all__ = [
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
def resize_image(
|
||||
frame: np.ndarray,
|
||||
target_shape: tuple[int, int] = (640, 640),
|
||||
) -> tuple[np.ndarray, float]:
|
||||
"""Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
The image is resized to fit within the target dimensions and placed on a
|
||||
blank canvas (zero-padded to target size).
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
frame: Input image with shape (H, W, C).
|
||||
target_shape: Target size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
A tuple containing:
|
||||
- Resized image on a blank canvas with shape (height, width, 3).
|
||||
- The resize factor as a float.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
@@ -43,28 +61,21 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
image_size: Input image size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
@@ -82,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
|
||||
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
dets: Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold: IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -122,19 +132,25 @@ def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
def decode_boxes(
|
||||
loc: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode locations from predictions using priors (RetinaFace specific).
|
||||
|
||||
Undoes the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
loc: Location predictions for loc layers, shape: [num_priors, 4].
|
||||
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
|
||||
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
Decoded bounding box predictions with shape [num_priors, 4].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
@@ -149,18 +165,23 @@ def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
predictions: Landmark predictions, shape: [num_priors, 10].
|
||||
priors: Prior boxes, shape: [num_priors, 4].
|
||||
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
Decoded landmarks, shape: [num_priors, 10].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
@@ -176,3 +197,65 @@ def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to 4 boundaries
|
||||
(left, top, right, bottom) with shape (n, 4).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
|
||||
if max_shape is not None:
|
||||
x1 = np.clip(x1, 0, max_shape[1])
|
||||
y1 = np.clip(y1, 0, max_shape[0])
|
||||
x2 = np.clip(x2, 0, max_shape[1])
|
||||
y2 = np.clip(y2, 0, max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = np.clip(px, 0, max_shape[1])
|
||||
py = np.clip(py, 0, max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
class SphereFaceWeights(str, Enum):
|
||||
@@ -54,6 +54,22 @@ class SCRFDWeights(str, Enum):
|
||||
SCRFD_500M_KPS = "scrfd_500m"
|
||||
|
||||
|
||||
class YOLOv5FaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
Original implementation: https://github.com/deepcam-cn/yolov5-face
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov5-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV5N: 11MB, 93.61% Easy / 91.52% Medium / 80.53% Hard
|
||||
- YOLOV5S: 28MB, 94.33% Easy / 92.61% Medium / 83.15% Hard
|
||||
- YOLOV5M: 82MB, 95.30% Easy / 93.76% Medium / 85.28% Hard
|
||||
"""
|
||||
YOLOV5N = "yolov5n"
|
||||
YOLOV5S = "yolov5s"
|
||||
YOLOV5M = "yolov5m"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
"""
|
||||
Trained on AffectNet dataset.
|
||||
@@ -71,6 +87,15 @@ class AgeGenderWeights(str, Enum):
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class FairFaceWeights(str, Enum):
|
||||
"""
|
||||
FairFace attribute prediction (race, gender, age).
|
||||
Trained on FairFace dataset with balanced demographics.
|
||||
https://github.com/yakhyo/fairface-onnx
|
||||
"""
|
||||
DEFAULT = "fairface"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
@@ -78,87 +103,140 @@ class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
# fmt: on
|
||||
|
||||
class GazeWeights(str, Enum):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation models.
|
||||
Trained on Gaze360 dataset.
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
"""
|
||||
RESNET18 = "gaze_resnet18"
|
||||
RESNET34 = "gaze_resnet34"
|
||||
RESNET50 = "gaze_resnet50"
|
||||
MOBILENET_V2 = "gaze_mobilenetv2"
|
||||
MOBILEONE_S0 = "gaze_mobileone_s0"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
class ParsingWeights(str, Enum):
|
||||
"""
|
||||
Face Parsing: Semantic Segmentation of Facial Components.
|
||||
Trained on CelebAMask-HQ dataset.
|
||||
https://github.com/yakhyo/face-parsing
|
||||
"""
|
||||
RESNET18 = "parsing_resnet18"
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
class MiniFASNetWeights(str, Enum):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing models.
|
||||
Trained on face anti-spoofing datasets.
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Model Variants:
|
||||
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
|
||||
- V2: Uses scale=2.7 for face crop (improved version)
|
||||
"""
|
||||
V1SE = "minifasnet_v1se"
|
||||
V2 = "minifasnet_v2"
|
||||
|
||||
|
||||
MODEL_URLS: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
|
||||
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
|
||||
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5n_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5s_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5M: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5m_face.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
GazeWeights.RESNET18: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
|
||||
GazeWeights.RESNET34: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet34_gaze.onnx',
|
||||
GazeWeights.RESNET50: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet50_gaze.onnx',
|
||||
GazeWeights.MOBILENET_V2: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobilenetv2_gaze.onnx',
|
||||
GazeWeights.MOBILEONE_S0: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobileone_s0_gaze.onnx',
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
|
||||
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
MODEL_SHA256: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
|
||||
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
|
||||
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'eb244a06e36999db732b317c2b30fa113cd6cfc1a397eaf738f2d6f33c01f640',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'fc682801cd5880e1e296184a14aea0035486b5146ec1a1389d2e7149cb134bb2',
|
||||
YOLOv5FaceWeights.YOLOV5M: '04302ce27a15bde3e20945691b688e2dd018a10e92dd8932146bede6a49207b2',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
|
||||
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user