Compare commits
54 Commits
v1.1.1
...
feat/unifa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb81d2fcf8 | ||
|
|
f0bae6dd80 | ||
|
|
eec8f99850 | ||
|
|
3682a2124f | ||
|
|
2ef6a1ebe8 | ||
|
|
78a2dba7c7 | ||
|
|
87e496d1f5 | ||
|
|
5604ebf4f1 | ||
|
|
971775b2e8 | ||
|
|
c520ea2df2 | ||
|
|
2a8cb54d31 | ||
|
|
331f46be7c | ||
|
|
9991fae62a | ||
|
|
b74ab95d39 | ||
|
|
d2b0303bfe | ||
|
|
5f74487eb3 | ||
|
|
f897482d26 | ||
|
|
f3d81eb201 | ||
|
|
ea0b56f7e0 | ||
|
|
edbab5f7bf | ||
|
|
cd8077e460 | ||
|
|
452b3381a2 | ||
|
|
07c8bd7b24 | ||
|
|
68179d1e2d | ||
|
|
99b35dddb4 | ||
|
|
3b6d0a35a9 | ||
|
|
0bd808bcef | ||
|
|
9edf8b6b3d | ||
|
|
efb40f2e91 | ||
|
|
376e7bc488 | ||
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc | ||
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 | ||
|
|
da8a5cf35b | ||
|
|
3982d677a9 | ||
|
|
f4458f0550 | ||
|
|
637316f077 | ||
|
|
6b1d2a1ce6 | ||
|
|
a5e97ac484 | ||
|
|
0c93598007 | ||
|
|
779952e3f8 |
BIN
.github/logos/logo_preview.jpg
vendored
|
Before Width: | Height: | Size: 826 KiB |
BIN
.github/logos/logo_readme.png
vendored
|
Before Width: | Height: | Size: 563 KiB |
BIN
.github/logos/uniface_enhanced.webp
vendored
Normal file
|
After Width: | Height: | Size: 427 KiB |
BIN
.github/logos/uniface_high_res_original.png
vendored
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
.github/logos/uniface_rounded.png
vendored
Normal file
|
After Width: | Height: | Size: 1.8 MiB |
BIN
.github/logos/uniface_rounded_150px.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 MiB |
BIN
.github/logos/uniface_rounded_q80.png
vendored
Normal file
|
After Width: | Height: | Size: 872 KiB |
BIN
.github/logos/uniface_rounded_q80.webp
vendored
Normal file
|
After Width: | Height: | Size: 62 KiB |
50
.github/workflows/ci.yml
vendored
@@ -1,23 +1,46 @@
|
||||
name: CI
|
||||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
include:
|
||||
# Full Python range on Linux (fastest runner)
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.13"
|
||||
- os: macos-latest
|
||||
python-version: "3.13"
|
||||
- os: windows-latest
|
||||
python-version: "3.13"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +50,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +61,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +79,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +101,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
38
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Deploy docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git-committers and git-revision-date plugins
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs
|
||||
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11" # Needs 3.11+ for tomllib
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
2
.gitignore
vendored
@@ -1,4 +1,6 @@
|
||||
tmp_*
|
||||
.vscode/
|
||||
*.onnx
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
41
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^mkdocs.yml$
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
190
CONTRIBUTING.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Contributing to UniFace
|
||||
|
||||
Thank you for considering contributing to UniFace! We welcome contributions of all kinds.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
- Use GitHub Issues to report bugs or suggest features
|
||||
- Include clear descriptions and reproducible examples
|
||||
- Check existing issues before creating new ones
|
||||
|
||||
### Pull Requests
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.10+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.10+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method: Detection method. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Example:
|
||||
>>> from uniface import create_detector
|
||||
>>> detector = create_detector('retinaface', confidence_threshold=0.8)
|
||||
>>> faces = detector.detect(image)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
| ------------------ | ------------------------------------------------------------------- |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
399
MODELS.md
@@ -1,399 +0,0 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|---------------------|--------|--------|--------|--------|--------|----------------------------|
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|-----------------|--------|-------|--------|--------|--------|----------------------------|
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|-------------|--------|-------|----------------------------|
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-----------------|-----------------|--------|------|-------|-------|-------|----------|--------------------|
|
||||
| `MNET_025` | MobileNetV1 0.25| 0.36M | 1MB | 98.76%| 92.02%| 82.37%| 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55%| 94.87%| 86.89%| 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30%| 93.77%| 85.29%| 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53%| 94.56%| 86.79%| 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-------------|----------|--------|------|-------|-------|-------|----------|----------------------|
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67%| 95.61%| 88.75%| 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72%| 95.64%| 89.92%| 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
|------------|--------|--------|------|-----------------------------|
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
|------------|-------------|--------|------|-------------------|
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
|--------------|---------|--------|------|-----------------------|
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection Guide
|
||||
|
||||
### By Use Case
|
||||
|
||||
#### Mobile/Edge Devices
|
||||
- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `MobileFace(MNET_V2)`
|
||||
- **Priority**: Speed, small model size
|
||||
|
||||
#### Real-Time Applications (Webcam, Video)
|
||||
- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `ArcFace(MNET)`
|
||||
- **Priority**: Speed-accuracy balance
|
||||
|
||||
#### High-Accuracy Applications (Security, Verification)
|
||||
- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Maximum accuracy
|
||||
|
||||
#### Server/Cloud Deployment
|
||||
- **Detection**: `SCRFD(SCRFD_10G)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Accuracy, batch processing
|
||||
|
||||
---
|
||||
|
||||
### By Hardware
|
||||
|
||||
#### Apple Silicon (M1/M2/M3/M4)
|
||||
**Recommended**: All models work well with ARM64 optimizations (automatically included)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
|
||||
|
||||
**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### NVIDIA GPU (CUDA)
|
||||
**Recommended**: Larger models for maximum throughput
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
|
||||
- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
|
||||
- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
|
||||
|
||||
#### CPU Only
|
||||
**Recommended**: Lightweight models
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
|
||||
|
||||
**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
|
||||
|
||||
---
|
||||
|
||||
## Benchmark Details
|
||||
|
||||
### How to Benchmark
|
||||
|
||||
Run benchmarks on your own hardware:
|
||||
|
||||
```bash
|
||||
# Detection speed
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
|
||||
# Compare models
|
||||
python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
|
||||
python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
|
||||
```
|
||||
|
||||
### Accuracy Metrics Explained
|
||||
|
||||
- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
|
||||
- **Easy**: Large faces (>50px), clear backgrounds
|
||||
- **Medium**: Medium-sized faces (30-50px), moderate occlusion
|
||||
- **Hard**: Small faces (<30px), heavy occlusion, blur
|
||||
|
||||
*Accuracy values are from the original papers - see references below*
|
||||
|
||||
- **Model Size**: ONNX model file size (affects download time and memory)
|
||||
- **Params**: Number of model parameters (affects inference speed)
|
||||
|
||||
### Important Notes
|
||||
|
||||
1. **Speed varies by**:
|
||||
- Image resolution
|
||||
- Number of faces in image
|
||||
- Hardware (CPU/GPU/CoreML)
|
||||
- Batch size
|
||||
- Operating system
|
||||
|
||||
2. **Accuracy varies by**:
|
||||
- Image quality
|
||||
- Lighting conditions
|
||||
- Face pose and occlusion
|
||||
- Demographic factors
|
||||
|
||||
3. **Always benchmark on your specific use case** before choosing a model
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
|
||||
372
QUICKSTART.md
@@ -1,372 +0,0 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
536
README.md
@@ -1,457 +1,259 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
<h1 align="center">UniFace: All-in-One Face Analysis Library</h1>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" width="90%" alt="UniFace - All-in-One Open-Source Face Analysis Library">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Face Tracking** — Multi-object tracking with [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) for persistent IDs across video frames
|
||||
- **Facial Landmarks** — 106-point landmark localization module (separate from 5-point detector landmarks)
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes), XSeg face masking
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Vector Indexing** — FAISS-backed embedding store for fast multi-identity search
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Install (All Platforms)
|
||||
**Standard installation**
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Platform-Specific Installation
|
||||
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
**GPU support (CUDA)**
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
**From source (latest version)**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
cd uniface && pip install -e .
|
||||
```
|
||||
|
||||
**FAISS vector indexing**
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # or faiss-gpu for CUDA
|
||||
```
|
||||
|
||||
**Optional dependencies**
|
||||
- Emotion model uses TorchScript and requires `torch`:
|
||||
`pip install torch` (choose the correct build for your OS/CUDA)
|
||||
- YOLOv5-Face and YOLOv8-Face support faster NMS with `torchvision`:
|
||||
`pip install torch torchvision` then use `nms_mode='torchvision'`
|
||||
|
||||
---
|
||||
|
||||
## Model Downloads and Cache
|
||||
|
||||
Models are downloaded automatically on first use and verified via SHA-256.
|
||||
|
||||
Default cache location: `~/.uniface/models`
|
||||
|
||||
Override with the programmatic API or environment variable:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
```
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/data/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Face Detection
|
||||
## Quick Example (Detection)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize detector
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/test_result.png" width="90%">
|
||||
<p>Face Detection Model Output</p>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Example (Face Analyzer)
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
import cv2
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(face.bbox, face.embedding.shape if face.embedding is not None else None)
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
---
|
||||
|
||||
## Execution Providers (ONNX Runtime)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
# Force CPU-only inference
|
||||
detector = RetinaFace(providers=["CPUExecutionProvider"])
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
See more in the docs:
|
||||
https://yakhyo.github.io/uniface/concepts/execution-providers/
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
Full documentation: https://yakhyo.github.io/uniface/
|
||||
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
| [Quickstart](https://yakhyo.github.io/uniface/quickstart/) | Get up and running in 5 minutes |
|
||||
| [Model Zoo](https://yakhyo.github.io/uniface/models/) | All models, benchmarks, and selection guide |
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
| [Datasets](https://yakhyo.github.io/uniface/datasets/) | Training data and evaluation benchmarks |
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
## Datasets
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
| Task | Training Dataset | Models |
|
||||
|------|-----------------|--------|
|
||||
| Detection | WIDER FACE | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | MS1MV2 | MobileFace, SphereFace |
|
||||
| Recognition | WebFace600K | ArcFace |
|
||||
| Recognition | WebFace4M / 12M | AdaFace |
|
||||
| Gaze | Gaze360 | MobileGaze |
|
||||
| Parsing | CelebAMask-HQ | BiSeNet |
|
||||
| Attributes | CelebA, FairFace, AffectNet | AgeGender, FairFace, Emotion |
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
```
|
||||
> See [Datasets documentation](https://yakhyo.github.io/uniface/datasets/) for download links, benchmarks, and details.
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
## Jupyter Notebooks
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [09_face_segmentation.ipynb](examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [10_face_vector_store.ipynb](examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
## Licensing and Model Usage
|
||||
|
||||
### Webcam Face Detection
|
||||
UniFace is MIT-licensed, but several pretrained models carry their own licenses.
|
||||
Review: https://yakhyo.github.io/uniface/license-attribution/
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
Notable examples:
|
||||
- YOLOv5-Face and YOLOv8-Face weights are GPL-3.0
|
||||
- FairFace weights are CC BY 4.0
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
### Logging Configuration
|
||||
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Development
|
||||
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
If you plan commercial use, verify model license compatibility.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
| Feature | Repository | Training | Description |
|
||||
|---------|------------|:--------:|-------------|
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | ✓ | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | - | YOLOv5-Face ONNX Inference |
|
||||
| Detection | [yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | - | YOLOv8-Face ONNX Inference |
|
||||
| Tracking | [bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) | - | BYTETracker Multi-Object Tracking |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | ✓ | MobileFace, SphereFace Training |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | ✓ | BiSeNet Face Parsing |
|
||||
| Parsing | [face-segmentation](https://github.com/yakhyo/face-segmentation) | - | XSeg Face Segmentation |
|
||||
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | ✓ | MobileGaze Training |
|
||||
| Anti-Spoofing | [face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | - | MiniFASNet Inference |
|
||||
| Attributes | [fairface-onnx](https://github.com/yakhyo/fairface-onnx) | - | FairFace ONNX Inference |
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
*SCRFD and ArcFace models are from [InsightFace](https://github.com/deepinsight/insightface).
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
Contributions are welcome. Please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Support
|
||||
|
||||
If you find this project useful, consider giving it a ⭐ on GitHub — it helps others discover it!
|
||||
|
||||
Questions or feedback:
|
||||
- Discord: https://discord.gg/wdzrjr7R5j
|
||||
- GitHub Issues: https://github.com/yakhyo/uniface/issues
|
||||
- DeepWiki Q&A: https://deepwiki.com/yakhyo/uniface
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
||||
BIN
assets/einstein/img_0.png
Normal file
|
After Width: | Height: | Size: 99 KiB |
BIN
assets/einstien.png
Normal file
|
After Width: | Height: | Size: 1.3 MiB |
BIN
assets/scientists.png
Normal file
|
After Width: | Height: | Size: 1.9 MiB |
|
Before Width: | Height: | Size: 33 KiB After Width: | Height: | Size: 33 KiB |
BIN
docs/assets/logo.webp
Normal file
|
After Width: | Height: | Size: 33 KiB |
191
docs/concepts/coordinate-systems.md
Normal file
@@ -0,0 +1,191 @@
|
||||
# Coordinate Systems
|
||||
|
||||
This page explains the coordinate formats used in UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Image Coordinates
|
||||
|
||||
All coordinates use **pixel-based, top-left origin**:
|
||||
|
||||
```
|
||||
(0, 0) ────────────────► x (width)
|
||||
│
|
||||
│ Image
|
||||
│
|
||||
▼
|
||||
y (height)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bounding Box Format
|
||||
|
||||
Bounding boxes use `[x1, y1, x2, y2]` format (top-left and bottom-right corners):
|
||||
|
||||
```
|
||||
(x1, y1) ─────────────────┐
|
||||
│ │
|
||||
│ Face │
|
||||
│ │
|
||||
└─────────────────────┘ (x2, y2)
|
||||
```
|
||||
|
||||
### Accessing Coordinates
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Direct access
|
||||
x1, y1, x2, y2 = face.bbox
|
||||
|
||||
# As properties
|
||||
bbox_xyxy = face.bbox_xyxy # [x1, y1, x2, y2]
|
||||
bbox_xywh = face.bbox_xywh # [x1, y1, width, height]
|
||||
```
|
||||
|
||||
### Conversion
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# xyxy → xywh
|
||||
def xyxy_to_xywh(bbox):
|
||||
x1, y1, x2, y2 = bbox
|
||||
return np.array([x1, y1, x2 - x1, y2 - y1])
|
||||
|
||||
# xywh → xyxy
|
||||
def xywh_to_xyxy(bbox):
|
||||
x, y, w, h = bbox
|
||||
return np.array([x, y, x + w, y + h])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Landmarks
|
||||
|
||||
### 5-Point Landmarks (Detection)
|
||||
|
||||
Returned by all detection models:
|
||||
|
||||
```python
|
||||
landmarks = face.landmarks # Shape: (5, 2)
|
||||
```
|
||||
|
||||
| Index | Point |
|
||||
|-------|-------|
|
||||
| 0 | Left Eye |
|
||||
| 1 | Right Eye |
|
||||
| 2 | Nose Tip |
|
||||
| 3 | Left Mouth Corner |
|
||||
| 4 | Right Mouth Corner |
|
||||
|
||||
```
|
||||
0 ● ● 1
|
||||
|
||||
● 2
|
||||
|
||||
3 ● ● 4
|
||||
```
|
||||
|
||||
### 106-Point Landmarks
|
||||
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
# Shape: (106, 2)
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Face Crop
|
||||
|
||||
To crop a face from an image:
|
||||
|
||||
```python
|
||||
def crop_face(image, bbox, margin=0):
|
||||
"""Crop face with optional margin."""
|
||||
h, w = image.shape[:2]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Add margin
|
||||
if margin > 0:
|
||||
bw, bh = x2 - x1, y2 - y1
|
||||
x1 = max(0, x1 - int(bw * margin))
|
||||
y1 = max(0, y1 - int(bh * margin))
|
||||
x2 = min(w, x2 + int(bw * margin))
|
||||
y2 = min(h, y2 + int(bh * margin))
|
||||
|
||||
return image[y1:y2, x1:x2]
|
||||
|
||||
# Usage
|
||||
face_crop = crop_face(image, face.bbox, margin=0.1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Angles
|
||||
|
||||
Gaze estimation returns pitch and yaw in **radians**:
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Angles in radians
|
||||
pitch = result.pitch # Vertical: + = up, - = down
|
||||
yaw = result.yaw # Horizontal: + = right, - = left
|
||||
|
||||
# Convert to degrees
|
||||
import numpy as np
|
||||
pitch_deg = np.degrees(pitch)
|
||||
yaw_deg = np.degrees(yaw)
|
||||
```
|
||||
|
||||
**Angle Reference:**
|
||||
|
||||
```
|
||||
pitch = +90° (up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(left) │ (right)
|
||||
│
|
||||
pitch = -90° (down)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
# Output: 112x112 aligned face image
|
||||
```
|
||||
|
||||
The alignment transforms faces to a canonical pose for better recognition accuracy.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Data types reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
232
docs/concepts/execution-providers.md
Normal file
@@ -0,0 +1,232 @@
|
||||
# Execution Providers
|
||||
|
||||
UniFace uses ONNX Runtime for model inference, which supports multiple hardware acceleration backends.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Provider Selection
|
||||
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CoreMLExecutionProvider** - Apple Silicon
|
||||
2. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
|
||||
## Explicit Provider Selection
|
||||
|
||||
You can specify which execution provider to use by passing the `providers` parameter:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Force CPU execution (even if GPU is available)
|
||||
detector = RetinaFace(providers=['CPUExecutionProvider'])
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
|
||||
# Use CUDA with CPU fallback
|
||||
detector = RetinaFace(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
All model classes accept the `providers` parameter:
|
||||
|
||||
- Detection: `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face`
|
||||
- Recognition: `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace`
|
||||
- Landmarks: `Landmark106`
|
||||
- Gaze: `MobileGaze`
|
||||
- Parsing: `BiSeNet`
|
||||
- Attributes: `AgeGender`, `FairFace`
|
||||
- Anti-Spoofing: `MiniFASNet`
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
providers = ort.get_available_providers()
|
||||
print("Available providers:", providers)
|
||||
```
|
||||
|
||||
**Example outputs:**
|
||||
|
||||
=== "macOS (Apple Silicon)"
|
||||
|
||||
```
|
||||
['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Linux (NVIDIA GPU)"
|
||||
|
||||
```
|
||||
['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Windows (CPU)"
|
||||
|
||||
```
|
||||
['CPUExecutionProvider']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Setup
|
||||
|
||||
### Apple Silicon (M1/M2/M3/M4)
|
||||
|
||||
No additional setup required. ARM64 optimizations are built into `onnxruntime`:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Verify ARM64:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
!!! tip "Performance"
|
||||
Apple Silicon Macs use CoreML acceleration automatically, providing excellent performance for face analysis tasks.
|
||||
|
||||
---
|
||||
|
||||
### NVIDIA GPU (CUDA)
|
||||
|
||||
Install with GPU support:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- Compatible NVIDIA driver
|
||||
|
||||
Verify CUDA:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
||||
print("CUDA is available!")
|
||||
else:
|
||||
print("CUDA not available, using CPU")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU Fallback
|
||||
|
||||
CPU execution is always available:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms without additional configuration.
|
||||
|
||||
---
|
||||
|
||||
## Internal API
|
||||
|
||||
For advanced use cases, you can access the provider utilities:
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Models use create_onnx_session() internally
|
||||
# which auto-selects the best provider
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### 1. Use GPU When Available
|
||||
|
||||
For batch processing or real-time applications, GPU acceleration provides significant speedups:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### 2. Optimize Input Size
|
||||
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
```
|
||||
|
||||
### 3. Batch Processing
|
||||
|
||||
Process multiple images to maximize GPU utilization:
|
||||
|
||||
```python
|
||||
# Process images in batch (GPU-efficient)
|
||||
for image_path in image_paths:
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
# ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### CUDA Not Detected
|
||||
|
||||
1. Verify CUDA installation:
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check CUDA version compatibility with ONNX Runtime
|
||||
|
||||
3. Reinstall with GPU support:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using ARM64 Python (not Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Cache & Offline](model-cache-offline.md) - Model management
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning parameters
|
||||
219
docs/concepts/inputs-outputs.md
Normal file
@@ -0,0 +1,219 @@
|
||||
# Inputs & Outputs
|
||||
|
||||
This page describes the data types used throughout UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Input: Images
|
||||
|
||||
All models accept NumPy arrays in **BGR format** (OpenCV default):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
# Load image (BGR format)
|
||||
image = cv2.imread("photo.jpg")
|
||||
print(f"Shape: {image.shape}") # (H, W, 3)
|
||||
print(f"Dtype: {image.dtype}") # uint8
|
||||
```
|
||||
|
||||
!!! warning "Color Format"
|
||||
UniFace expects **BGR** format (OpenCV default). If using PIL or other libraries, convert first:
|
||||
|
||||
```python
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
pil_image = Image.open("photo.jpg")
|
||||
bgr_image = np.array(pil_image)[:, :, ::-1] # RGB → BGR
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output: Face Dataclass
|
||||
|
||||
Detection returns a list of `Face` objects:
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
# Required (from detection)
|
||||
bbox: np.ndarray # [x1, y1, x2, y2]
|
||||
confidence: float # 0.0 to 1.0
|
||||
landmarks: np.ndarray # (5, 2) or (106, 2)
|
||||
|
||||
# Optional (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None # 0=Female, 1=Male
|
||||
age: int | None = None # Years
|
||||
age_group: str | None = None # "20-29", etc.
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
track_id: int | None = None # Persistent ID from tracker
|
||||
```
|
||||
|
||||
### Properties
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Bounding box formats
|
||||
face.bbox_xyxy # [x1, y1, x2, y2] - same as bbox
|
||||
face.bbox_xywh # [x1, y1, width, height]
|
||||
|
||||
# Gender as string
|
||||
face.sex # "Female" or "Male" (None if not predicted)
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Compute similarity with another face
|
||||
similarity = face1.compute_similarity(face2)
|
||||
|
||||
# Convert to dictionary
|
||||
face_dict = face.to_dict()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Result Types
|
||||
|
||||
### GazeResult
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GazeResult:
|
||||
pitch: float # Vertical angle (radians), + = up
|
||||
yaw: float # Horizontal angle (radians), + = right
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°")
|
||||
print(f"Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SpoofingResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class SpoofingResult:
|
||||
is_real: bool # True = real, False = fake
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### AttributeResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class AttributeResult:
|
||||
gender: int # 0=Female, 1=Male
|
||||
age: int | None # Years (AgeGender model)
|
||||
age_group: str | None # "20-29" (FairFace model)
|
||||
race: str | None # Race label (FairFace model)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
return "Female" if self.gender == 0 else "Male"
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
# AgeGender model
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
|
||||
# FairFace model
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### EmotionResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class EmotionResult:
|
||||
emotion: str # "Happy", "Sad", etc.
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
Face recognition models return normalized 512-dimensional embeddings:
|
||||
|
||||
```python
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f"Shape: {embedding.shape}") # (1, 512)
|
||||
print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
```
|
||||
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parsing Masks
|
||||
|
||||
Face parsing returns a segmentation mask:
|
||||
|
||||
```python
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Shape: {mask.shape}") # (H, W)
|
||||
print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
```
|
||||
|
||||
**19 Classes:**
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Coordinate Systems](coordinate-systems.md) - Bbox and landmark formats
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning confidence thresholds
|
||||
262
docs/concepts/model-cache-offline.md
Normal file
@@ -0,0 +1,262 @@
|
||||
# Model Cache & Offline Use
|
||||
|
||||
UniFace automatically downloads and caches models. This page explains how model management works.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Download
|
||||
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
|
||||
# Subsequent runs: loads from cache
|
||||
detector = RetinaFace() # Instant
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cache Location
|
||||
|
||||
Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
```
|
||||
|
||||
**Example structure:**
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mnet_v2.onnx
|
||||
├── arcface_mnet.onnx
|
||||
├── 2d_106.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Use the programmatic API to change the cache location at runtime:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
# Set a custom cache directory
|
||||
set_cache_dir('/data/models')
|
||||
|
||||
# Verify the current path
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# All subsequent model loads use the new directory
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Downloads to /data/models/
|
||||
```
|
||||
|
||||
Or set the `UNIFACE_CACHE_DIR` environment variable (see [Environment Variables](#environment-variables) below).
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment using the concurrent downloader:
|
||||
|
||||
```python
|
||||
from uniface.model_store import download_models
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download multiple models concurrently (up to 4 threads by default)
|
||||
paths = download_models([
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
])
|
||||
|
||||
for model, path in paths.items():
|
||||
print(f"{model.value} -> {path}")
|
||||
```
|
||||
|
||||
Or download one at a time:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
|
||||
```bash
|
||||
python tools/download_model.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Offline Use
|
||||
|
||||
For air-gapped or offline environments:
|
||||
|
||||
### 1. Pre-download models
|
||||
|
||||
On a connected machine:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Copy from: {path}")
|
||||
```
|
||||
|
||||
### 2. Copy to target machine
|
||||
|
||||
```bash
|
||||
# Copy the entire cache directory
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Point to the cache (if non-default location)
|
||||
|
||||
```python
|
||||
from uniface.model_store import set_cache_dir
|
||||
|
||||
# Only needed if the models are not at ~/.uniface/models/
|
||||
set_cache_dir('/path/to/copied/models')
|
||||
```
|
||||
|
||||
### 4. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Verification
|
||||
|
||||
Models are verified with SHA-256 checksums:
|
||||
|
||||
```python
|
||||
from uniface.constants import MODEL_SHA256, RetinaFaceWeights
|
||||
|
||||
# Check expected checksum
|
||||
expected = MODEL_SHA256[RetinaFaceWeights.MNET_V2]
|
||||
print(f"Expected SHA256: {expected}")
|
||||
```
|
||||
|
||||
If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
### Detection Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| RetinaFace MNET_025 | 1.7 MB | ✅ |
|
||||
| RetinaFace MNET_V2 | 3.5 MB | ✅ |
|
||||
| RetinaFace RESNET34 | 56 MB | ✅ |
|
||||
| SCRFD 500M | 2.5 MB | ✅ |
|
||||
| SCRFD 10G | 17 MB | ✅ |
|
||||
| YOLOv5n-Face | 11 MB | ✅ |
|
||||
| YOLOv5s-Face | 28 MB | ✅ |
|
||||
| YOLOv5m-Face | 82 MB | ✅ |
|
||||
| YOLOv8-Lite-S | 7.4 MB | ✅ |
|
||||
| YOLOv8n-Face | 12 MB | ✅ |
|
||||
|
||||
### Recognition Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| ArcFace MNET | 8 MB | ✅ |
|
||||
| ArcFace RESNET | 166 MB | ✅ |
|
||||
| MobileFace MNET_V2 | 4 MB | ✅ |
|
||||
| SphereFace SPHERE20 | 50 MB | ✅ |
|
||||
|
||||
### Other Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| Landmark106 | 14 MB | ✅ |
|
||||
| AgeGender | 8 MB | ✅ |
|
||||
| FairFace | 44 MB | ✅ |
|
||||
| Gaze ResNet34 | 82 MB | ✅ |
|
||||
| BiSeNet ResNet18 | 51 MB | ✅ |
|
||||
| MiniFASNet V2 | 1.2 MB | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Find and remove cached models:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir
|
||||
print(get_cache_dir()) # shows the active cache path
|
||||
```
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
rm -rf ~/.uniface/models/
|
||||
|
||||
# Remove specific model
|
||||
rm ~/.uniface/models/retinaface_mv2.onnx
|
||||
```
|
||||
|
||||
Models will be re-downloaded on next use.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
There are three equivalent ways to configure the cache directory:
|
||||
|
||||
**1. Programmatic API (recommended)**
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/path/to/custom/cache')
|
||||
print(get_cache_dir()) # /path/to/custom/cache
|
||||
```
|
||||
|
||||
**2. Direct environment variable (Python)**
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
**3. Shell environment variable**
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
All three methods set the same `UNIFACE_CACHE_DIR` environment variable under the hood. `get_cache_dir()` always returns the resolved path.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tune model parameters
|
||||
- [Detection Module](../modules/detection.md) - Detection model details
|
||||
229
docs/concepts/overview.md
Normal file
@@ -0,0 +1,229 @@
|
||||
# Overview
|
||||
|
||||
UniFace is designed as a modular, production-ready face analysis library. This page explains the architecture and design principles.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
UniFace follows a modular architecture where each face analysis task is handled by a dedicated module:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph Input
|
||||
IMG[Image/Frame]
|
||||
end
|
||||
|
||||
subgraph Detection
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face / YOLOv8Face]
|
||||
end
|
||||
|
||||
subgraph Analysis
|
||||
REC[Recognition]
|
||||
LMK[Landmarks]
|
||||
ATTR[Attributes]
|
||||
GAZE[Gaze]
|
||||
PARSE[Parsing]
|
||||
SPOOF[Anti-Spoofing]
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Tracking
|
||||
TRK[BYTETracker]
|
||||
end
|
||||
|
||||
subgraph Indexing
|
||||
IDX[FAISS Vector Store]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
|
||||
IMG --> DET
|
||||
DET --> REC
|
||||
DET --> LMK
|
||||
DET --> ATTR
|
||||
DET --> GAZE
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
DET --> TRK
|
||||
REC --> IDX
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
TRK --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. ONNX-First
|
||||
|
||||
UniFace runs inference primarily via ONNX Runtime for core components:
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
- **Production-ready**: No Python-only dependencies for inference
|
||||
|
||||
Some optional components (e.g., emotion TorchScript, torchvision NMS) require PyTorch.
|
||||
|
||||
### 2. Minimal Dependencies
|
||||
|
||||
Core dependencies are kept minimal:
|
||||
|
||||
```
|
||||
numpy # Array operations
|
||||
opencv-python # Image processing
|
||||
onnxruntime # Model inference
|
||||
requests # Model download
|
||||
tqdm # Progress bars
|
||||
```
|
||||
|
||||
### 3. Simple API
|
||||
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Or via factory function
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
|
||||
Full type hints throughout:
|
||||
|
||||
```python
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── tracking/ # Multi-object tracking (BYTETracker)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── gaze/ # Gaze estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── indexing/ # Vector indexing (FAISS)
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── draw.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# 2. Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# 3. Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# 4. Analyze each face
|
||||
for face in faces:
|
||||
# Recognition embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
print(f"Face: {attrs.sex}, {attrs.age} years")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FaceAnalyzer
|
||||
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
age_gender=age_gender,
|
||||
fairface=fairface,
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR`)
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
|
||||
```python
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Optionally configure cache location
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Understand data types
|
||||
- [Execution Providers](execution-providers.md) - Hardware acceleration
|
||||
- [Detection Module](../modules/detection.md) - Start with face detection
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
234
docs/concepts/thresholds-calibration.md
Normal file
@@ -0,0 +1,234 @@
|
||||
# Thresholds & Calibration
|
||||
|
||||
This page explains how to tune detection and recognition thresholds for your use case.
|
||||
|
||||
---
|
||||
|
||||
## Detection Thresholds
|
||||
|
||||
### Confidence Threshold
|
||||
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
|
||||
# High precision (fewer false positives)
|
||||
detector = RetinaFace(confidence_threshold=0.8)
|
||||
|
||||
# High recall (catch more faces)
|
||||
detector = RetinaFace(confidence_threshold=0.3)
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
| Threshold | Use Case |
|
||||
|-----------|----------|
|
||||
| 0.3 - 0.4 | Maximum recall (research, analysis) |
|
||||
| 0.5 - 0.6 | Balanced (default, general use) |
|
||||
| 0.7 - 0.9 | High precision (production, security) |
|
||||
|
||||
---
|
||||
|
||||
### NMS Threshold
|
||||
|
||||
Non-Maximum Suppression removes overlapping detections:
|
||||
|
||||
```python
|
||||
# Default
|
||||
detector = RetinaFace(nms_threshold=0.4)
|
||||
|
||||
# Stricter (fewer overlapping boxes)
|
||||
detector = RetinaFace(nms_threshold=0.3)
|
||||
|
||||
# Looser (for crowded scenes)
|
||||
detector = RetinaFace(nms_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Input Size
|
||||
|
||||
Affects detection accuracy and speed:
|
||||
|
||||
```python
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
|
||||
# Higher accuracy, slower
|
||||
detector = RetinaFace(input_size=(1280, 1280))
|
||||
```
|
||||
|
||||
!!! tip "Dynamic Size"
|
||||
For RetinaFace, enable dynamic input for variable image sizes:
|
||||
```python
|
||||
detector = RetinaFace(dynamic_size=True)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recognition Thresholds
|
||||
|
||||
### Similarity Threshold
|
||||
|
||||
For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
# Threshold interpretation
|
||||
if similarity > 0.6:
|
||||
print("Same person (high confidence)")
|
||||
elif similarity > 0.4:
|
||||
print("Uncertain (manual review)")
|
||||
else:
|
||||
print("Different people")
|
||||
```
|
||||
|
||||
**Recommended thresholds:**
|
||||
|
||||
| Threshold | Decision | False Accept Rate |
|
||||
|-----------|----------|-------------------|
|
||||
| 0.4 | Low security | Higher FAR |
|
||||
| 0.5 | Balanced | Moderate FAR |
|
||||
| 0.6 | High security | Lower FAR |
|
||||
| 0.7 | Very strict | Very low FAR |
|
||||
|
||||
---
|
||||
|
||||
### Calibration for Your Dataset
|
||||
|
||||
Test on your data to find optimal thresholds:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def calibrate_threshold(same_pairs, diff_pairs, recognizer, detector):
|
||||
"""Find optimal threshold for your dataset."""
|
||||
same_scores = []
|
||||
diff_scores = []
|
||||
|
||||
# Compute similarities for same-person pairs
|
||||
for img1_path, img2_path in same_pairs:
|
||||
img1 = cv2.imread(img1_path)
|
||||
img2 = cv2.imread(img2_path)
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if faces1 and faces2:
|
||||
emb1 = recognizer.get_normalized_embedding(img1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(img2, faces2[0].landmarks)
|
||||
same_scores.append(np.dot(emb1, emb2.T)[0][0])
|
||||
|
||||
# Compute similarities for different-person pairs
|
||||
for img1_path, img2_path in diff_pairs:
|
||||
# ... similar process
|
||||
diff_scores.append(similarity)
|
||||
|
||||
# Find optimal threshold
|
||||
thresholds = np.arange(0.3, 0.8, 0.05)
|
||||
best_threshold = 0.5
|
||||
best_accuracy = 0
|
||||
|
||||
for thresh in thresholds:
|
||||
tp = sum(1 for s in same_scores if s >= thresh)
|
||||
tn = sum(1 for s in diff_scores if s < thresh)
|
||||
accuracy = (tp + tn) / (len(same_scores) + len(diff_scores))
|
||||
|
||||
if accuracy > best_accuracy:
|
||||
best_accuracy = accuracy
|
||||
best_threshold = thresh
|
||||
|
||||
return best_threshold, best_accuracy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Thresholds
|
||||
|
||||
The MiniFASNet model returns a confidence score:
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
spoofer = MiniFASNet()
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# Default threshold (0.5)
|
||||
if result.is_real: # confidence > 0.5
|
||||
print("Real face")
|
||||
|
||||
# Custom threshold for high security
|
||||
SPOOF_THRESHOLD = 0.7
|
||||
if result.confidence > SPOOF_THRESHOLD:
|
||||
print("Real face (high confidence)")
|
||||
else:
|
||||
print("Potentially fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attribute Model Confidence
|
||||
|
||||
### Emotion
|
||||
|
||||
```python
|
||||
result = emotion_predictor.predict(image, landmarks)
|
||||
|
||||
# Filter low-confidence predictions
|
||||
if result.confidence > 0.6:
|
||||
print(f"Emotion: {result.emotion}")
|
||||
else:
|
||||
print("Uncertain emotion")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Threshold
|
||||
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
scores = [f.confidence for f in faces if f.confidence > 0.7]
|
||||
landmarks = [f.landmarks for f in faces if f.confidence > 0.7]
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6 # Additional visualization filter
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Parameter | Default | Range | Lower = | Higher = |
|
||||
|-----------|---------|-------|---------|----------|
|
||||
| `confidence_threshold` | 0.5 | 0.1-0.9 | More detections | Fewer false positives |
|
||||
| `nms_threshold` | 0.4 | 0.1-0.7 | Fewer overlaps | More overlapping boxes |
|
||||
| Similarity threshold | 0.6 | 0.3-0.8 | More matches (FAR↑) | Fewer matches (FRR↑) |
|
||||
| Spoof confidence | 0.5 | 0.3-0.9 | More "real" | Stricter liveness |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Detection model options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition model options
|
||||
72
docs/contributing.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Contributing
|
||||
|
||||
Thank you for contributing to UniFace!
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
We use [Ruff](https://docs.astral.sh/ruff/) for formatting:
|
||||
|
||||
```bash
|
||||
ruff format .
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
- Line length: 120
|
||||
- Python 3.10+ type hints
|
||||
- Google-style docstrings
|
||||
|
||||
---
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Write tests for new features
|
||||
4. Ensure tests pass
|
||||
5. Submit PR with clear description
|
||||
|
||||
---
|
||||
|
||||
## Adding New Models
|
||||
|
||||
1. Create model class in appropriate submodule
|
||||
2. Add weight constants to `uniface/constants.py`
|
||||
3. Export in `__init__.py` files
|
||||
4. Write tests in `tests/`
|
||||
5. Add example in `tools/` or notebooks
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue on [GitHub](https://github.com/yakhyo/uniface/issues).
|
||||
324
docs/datasets.md
Normal file
@@ -0,0 +1,324 @@
|
||||
# Datasets
|
||||
|
||||
Overview of all training datasets and evaluation benchmarks used by UniFace models.
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Task | Dataset | Scale | Models |
|
||||
| ----------- | ------------------------------------------------ | ---------------------- | ------------------------------------------- |
|
||||
| Detection | [WIDER FACE](#wider-face) | 32K images | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | [MS1MV2](#ms1mv2) | 5.8M images, 85.7K IDs | MobileFace, SphereFace |
|
||||
| Recognition | [WebFace600K](#webface600k) | 600K images | ArcFace |
|
||||
| Recognition | [WebFace4M / WebFace12M](#webface4m--webface12m) | 4M / 12M images | AdaFace |
|
||||
| Gaze | [Gaze360](#gaze360) | 238 subjects | MobileGaze |
|
||||
| Parsing | [CelebAMask-HQ](#celebamask-hq) | 30K images | BiSeNet |
|
||||
| Attributes | [CelebA](#celeba) | 200K images | AgeGender |
|
||||
| Attributes | [FairFace](#fairface) | Balanced demographics | FairFace |
|
||||
| Attributes | [AffectNet](#affectnet) | Emotion labels | Emotion |
|
||||
|
||||
---
|
||||
|
||||
## Training Datasets
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE
|
||||
|
||||
Large-scale face detection benchmark with images across 61 event categories. Contains faces with a high degree of variability in scale, pose, occlusion, expression, and illumination.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------------------------------------------- |
|
||||
| Images | ~32,000 (train/val/test split) |
|
||||
| Faces | ~394,000 annotated |
|
||||
| Subsets | Easy, Medium, Hard |
|
||||
| Used by | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
|
||||
!!! info "Download & References"
|
||||
**Paper**: [WIDER FACE: A Face Detection Benchmark](https://arxiv.org/abs/1511.06523)
|
||||
|
||||
**Download**: [http://shuoyang1213.me/WIDERFACE/](http://shuoyang1213.me/WIDERFACE/)
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
#### MS1MV2
|
||||
|
||||
Refined version of the MS-Celeb-1M dataset, cleaned by InsightFace. Widely used for training face recognition models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 85.7K |
|
||||
| Images | 5.8M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | MobileFace, SphereFace |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [ms1m-arcface-dataset](https://www.kaggle.com/datasets/yakhyokhuja/ms1m-arcface-dataset) (from InsightFace)
|
||||
|
||||
**Training code**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace600K
|
||||
|
||||
Medium-scale face recognition dataset from the WebFace series.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------- |
|
||||
| Images | ~600K |
|
||||
| Used by | ArcFace |
|
||||
|
||||
!!! info "Source"
|
||||
**Origin**: [InsightFace](https://github.com/deepinsight/insightface)
|
||||
|
||||
**Paper**: [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace4M / WebFace12M
|
||||
|
||||
Large-scale face recognition datasets from the WebFace260M collection. Used for training AdaFace models with adaptive quality-aware margin.
|
||||
|
||||
| Property | WebFace4M | WebFace12M |
|
||||
| -------- | ------------- | -------------- |
|
||||
| Images | ~4M | ~12M |
|
||||
| Used by | AdaFace IR_18 | AdaFace IR_101 |
|
||||
|
||||
!!! info "Source"
|
||||
**Paper**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
|
||||
**Original code**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace)
|
||||
|
||||
---
|
||||
|
||||
#### CASIA-WebFace
|
||||
|
||||
Smaller-scale face recognition dataset suitable for academic research and lighter training runs.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 10.6K |
|
||||
| Images | 491K |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [webface-112x112](https://www.kaggle.com/datasets/yakhyokhuja/webface-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
#### VGGFace2
|
||||
|
||||
Large-scale dataset with wide variations in pose, age, illumination, ethnicity, and profession.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 8.6K |
|
||||
| Images | 3.1M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [vggface2-112x112](https://www.kaggle.com/datasets/yakhyokhuja/vggface2-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
#### Gaze360
|
||||
|
||||
Large-scale gaze estimation dataset collected in indoor and outdoor environments with diverse head poses and wide gaze ranges (up to 360 degrees).
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | --------------------- |
|
||||
| Subjects | 238 |
|
||||
| Environment | Indoor and outdoor |
|
||||
| Used by | All MobileGaze models |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [gaze360.csail.mit.edu/download.php](https://gaze360.csail.mit.edu/download.php)
|
||||
|
||||
**Preprocessing**: [GazeHub - Gaze360](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#gaze360)
|
||||
|
||||
!!! note "UniFace Models"
|
||||
All MobileGaze models shipped with UniFace are trained exclusively on Gaze360 for 200 epochs.
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── Gaze360/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### MPIIFaceGaze
|
||||
|
||||
Dataset for appearance-based gaze estimation from laptop webcam images of participants during everyday laptop usage. Supported by the gaze estimation training code but not used for the UniFace pretrained weights.
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | ---------------------------------------- |
|
||||
| Subjects | 15 |
|
||||
| Environment | Everyday laptop usage |
|
||||
| Used by | Supported (not used for UniFace weights) |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [MPIIFaceGaze download page](https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/its-written-all-over-your-face-full-face-appearance-based-gaze-estimation)
|
||||
|
||||
**Preprocessing**: [GazeHub - MPIIFaceGaze](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#mpiifacegaze)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── MPIIFaceGaze/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Face Parsing
|
||||
|
||||
#### CelebAMask-HQ
|
||||
|
||||
High-quality face parsing dataset with pixel-level annotations for 19 facial component classes.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ---------------------------- |
|
||||
| Images | 30,000 |
|
||||
| Classes | 19 facial components |
|
||||
| Resolution | High quality |
|
||||
| Used by | BiSeNet (ResNet18, ResNet34) |
|
||||
|
||||
!!! info "Source"
|
||||
**GitHub**: [switchablenorms/CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ)
|
||||
|
||||
**Training code**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── images/ # Input face images
|
||||
│ ├── image1.jpg
|
||||
│ └── ...
|
||||
└── labels/ # Segmentation masks
|
||||
├── image1.png
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Attribute Analysis
|
||||
|
||||
#### CelebA
|
||||
|
||||
Large-scale face attributes dataset widely used for training age and gender prediction models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | -------------------- |
|
||||
| Images | ~200K |
|
||||
| Attributes | 40 binary attributes |
|
||||
| Used by | AgeGender |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [Deep Learning Face Attributes in the Wild](https://arxiv.org/abs/1411.7766)
|
||||
|
||||
---
|
||||
|
||||
#### FairFace
|
||||
|
||||
Face attribute dataset designed for balanced representation across race, gender, and age groups. Provides more equitable predictions compared to imbalanced datasets.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ----------------------------------- |
|
||||
| Attributes | Race (7), Gender (2), Age Group (9) |
|
||||
| Used by | FairFace |
|
||||
| License | CC BY 4.0 |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [FairFace: Face Attribute Dataset for Balanced Race, Gender, and Age](https://arxiv.org/abs/1908.04913)
|
||||
|
||||
**ONNX inference**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx)
|
||||
|
||||
---
|
||||
|
||||
#### AffectNet
|
||||
|
||||
Large-scale facial expression dataset for emotion recognition training.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ----------------------------------------------------------------------- |
|
||||
| Classes | 7 or 8 (Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry + Contempt) |
|
||||
| Used by | Emotion (AFFECNET7, AFFECNET8) |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [AffectNet: A Database for Facial Expression, Valence, and Arousal Computing in the Wild](https://ieeexplore.ieee.org/document/8013713)
|
||||
|
||||
---
|
||||
|
||||
## Evaluation Benchmarks
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE Validation Set
|
||||
|
||||
The standard benchmark for face detection models. Results are reported across three difficulty subsets.
|
||||
|
||||
| Subset | Criteria |
|
||||
| ------ | --------------------------------------------- |
|
||||
| Easy | Large, clear, unoccluded faces |
|
||||
| Medium | Moderate scale and occlusion |
|
||||
| Hard | Small, heavily occluded, or challenging faces |
|
||||
|
||||
See [Model Zoo - Detection](models.md#face-detection-models) for per-model accuracy on each subset.
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
Recognition models are evaluated across multiple benchmarks. Aligned 112x112 validation datasets are available as a single download.
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle**: [agedb-30-calfw-cplfw-lfw-aligned-112x112](https://www.kaggle.com/datasets/yakhyokhuja/agedb-30-calfw-cplfw-lfw-aligned-112x112)
|
||||
|
||||
| Benchmark | Description | Used by |
|
||||
| ------------ | ----------------------------------------------------------------- | ------------------------------- |
|
||||
| **LFW** | Labeled Faces in the Wild - standard face verification benchmark | ArcFace, MobileFace, SphereFace |
|
||||
| **CALFW** | Cross-Age LFW - face verification across age gaps | MobileFace, SphereFace |
|
||||
| **CPLFW** | Cross-Pose LFW - face verification across pose variations | MobileFace, SphereFace |
|
||||
| **AgeDB-30** | Age database with 30-year age gaps | ArcFace, MobileFace, SphereFace |
|
||||
| **CFP-FP** | Celebrities in Frontal-Profile - frontal vs. profile verification | ArcFace |
|
||||
| **IJB-B** | IARPA Janus Benchmark B - TAR@FAR=0.01% | AdaFace |
|
||||
| **IJB-C** | IARPA Janus Benchmark C - TAR@FAR=1e-4 | AdaFace, ArcFace |
|
||||
|
||||
See [Model Zoo - Recognition](models.md#face-recognition-models) for per-model accuracy on each benchmark.
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
| Benchmark | Metric | Description |
|
||||
| -------------------- | ------------- | -------------------------------------------- |
|
||||
| **Gaze360 test set** | MAE (degrees) | Mean Absolute Error in gaze angle prediction |
|
||||
|
||||
See [Model Zoo - Gaze](models.md#gaze-estimation-models) for per-model MAE scores.
|
||||
|
||||
---
|
||||
|
||||
## Training Repositories
|
||||
|
||||
For training your own models or reproducing results, see the following repositories:
|
||||
|
||||
| Task | Repository | Datasets Supported |
|
||||
| ----------- | ------------------------------------------------------------------------- | ------------------------------- |
|
||||
| Detection | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | WIDER FACE |
|
||||
| Recognition | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MS1MV2, CASIA-WebFace, VGGFace2 |
|
||||
| Gaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | Gaze360, MPIIFaceGaze |
|
||||
| Parsing | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | CelebAMask-HQ |
|
||||
147
docs/index.md
Normal file
@@ -0,0 +1,147 @@
|
||||
---
|
||||
hide:
|
||||
- toc
|
||||
- navigation
|
||||
- edit
|
||||
template: home.html
|
||||
---
|
||||
|
||||
<div class="hero" markdown>
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">All-in-One Open-Source Face Analysis Library</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
<!-- <img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" alt="UniFace - All-in-One Open-Source Face Analysis Library" style="max-width: 70%; margin: 1rem 0;"> -->
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
|
||||
</div>
|
||||
|
||||
<div class="feature-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized detectors (RetinaFace, SCRFD, YOLO) with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-check: Face Recognition
|
||||
AdaFace, ArcFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-map-marker: Landmarks
|
||||
Accurate 106-point facial landmark localization for detailed face analysis.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-details: Attributes
|
||||
Age, gender, race (FairFace), and emotion detection from faces.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-man-shimmer: Face Parsing
|
||||
BiSeNet semantic segmentation with 19 facial component classes.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-eye: Gaze Estimation
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-motion-play: Tracking
|
||||
Multi-object tracking with BYTETracker for persistent face IDs across video frames.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-blur: Privacy
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-database-search: Vector Indexing
|
||||
FAISS-backed embedding store for fast multi-identity face search.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
UniFace runs inference primarily via **ONNX Runtime**; some optional components (e.g., emotion TorchScript, torchvision NMS) require **PyTorch**.
|
||||
|
||||
**Standard**
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
**GPU (CUDA)**
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**From Source**
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
<div class="next-steps-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-rocket-launch: Quickstart
|
||||
Get up and running in 5 minutes with common use cases.
|
||||
|
||||
[Quickstart Guide →](quickstart.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-school: Tutorials
|
||||
Step-by-step examples for common workflows.
|
||||
|
||||
[View Tutorials →](recipes/image-pipeline.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-api: API Reference
|
||||
Explore individual modules and their APIs.
|
||||
|
||||
[Browse API →](modules/detection.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-book-open-variant: Guides
|
||||
Learn about the architecture and design principles.
|
||||
|
||||
[Read Guides →](concepts/overview.md)
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
196
docs/installation.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Installation
|
||||
|
||||
This guide covers all installation options for UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: 3.10 or higher
|
||||
- **Operating Systems**: macOS, Linux, Windows
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
The simplest way to install UniFace:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
This installs the CPU version with all core dependencies.
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Installation
|
||||
|
||||
### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes ARM64 optimizations:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
!!! tip "Native Performance"
|
||||
The base `onnxruntime` package has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. No additional configuration needed.
|
||||
|
||||
Verify ARM64 installation:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- `uniface[gpu]` automatically installs `onnxruntime-gpu`. Requirements depend on the ORT version and execution provider.
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See the [ONNX Runtime GPU compatibility matrix](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for matching CUDA and cuDNN versions.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
# Should include: 'CUDAExecutionProvider'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### FAISS Vector Indexing
|
||||
|
||||
For fast multi-identity face search using a FAISS index:
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # CPU
|
||||
pip install faiss-gpu # NVIDIA GPU (CUDA)
|
||||
```
|
||||
|
||||
See the [Indexing module](modules/indexing.md) for usage.
|
||||
|
||||
---
|
||||
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms with automatic CPU fallback.
|
||||
|
||||
---
|
||||
|
||||
## Install from Source
|
||||
|
||||
For development or the latest features:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
With development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
UniFace has minimal dependencies:
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `scikit-image` | Geometric transforms |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
**Optional:**
|
||||
|
||||
| Package | Install extra | Purpose |
|
||||
|---------|---------------|---------|
|
||||
| `faiss-cpu` / `faiss-gpu` | `pip install faiss-cpu` | FAISS vector indexing |
|
||||
| `onnxruntime-gpu` | `uniface[gpu]` | CUDA acceleration |
|
||||
| `torch` | `pip install torch` | Emotion model uses TorchScript |
|
||||
| `torchvision` | `pip install torchvision` | Faster NMS for YOLO detectors |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
|
||||
Test your installation:
|
||||
|
||||
```python
|
||||
import uniface
|
||||
print(f"UniFace version: {uniface.__version__}")
|
||||
|
||||
# Check available ONNX providers
|
||||
import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.10+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.10.x or higher
|
||||
```
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
Models are automatically downloaded on first use. If downloads fail:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Performance Issues on Mac
|
||||
|
||||
Verify you're using the ARM64 build (not x86_64 via Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart Guide](quickstart.md) - Get started in 5 minutes
|
||||
- [Execution Providers](concepts/execution-providers.md) - Hardware acceleration setup
|
||||
24
docs/license-attribution.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Licenses & Attribution
|
||||
|
||||
## UniFace License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
||||
---
|
||||
|
||||
## Model Credits
|
||||
|
||||
| Model | Source | License |
|
||||
|-------|--------|---------|
|
||||
| RetinaFace | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | MIT |
|
||||
| SCRFD | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| YOLOv5-Face | [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | GPL-3.0 |
|
||||
| YOLOv8-Face | [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | GPL-3.0 |
|
||||
| AdaFace | [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) | MIT |
|
||||
| ArcFace | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| MobileFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| BiSeNet | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | MIT |
|
||||
| MobileGaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | MIT |
|
||||
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
|
||||
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
|
||||
389
docs/models.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# Model Zoo
|
||||
|
||||
Complete guide to all available models and their performance characteristics.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `SCRFD_500M_KPS` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G_KPS` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| -------------- | ---- | ------ | ------ | ------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
### YOLOv8-Face Family
|
||||
|
||||
YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for bbox regression. Provides detection with 5-point facial landmarks.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ------ | ------ | ------ |
|
||||
| `YOLOV8_LITE_S`| 7.4MB | 93.4% | 91.2% | 78.6% |
|
||||
| `YOLOV8N` :material-check-circle: | 12MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
| Model Name | Backbone | Dataset | Size | IJB-B TAR | IJB-C TAR |
|
||||
| ----------- | -------- | ----------- | ------ | --------- | --------- |
|
||||
| `IR_18` :material-check-circle: | IR-18 | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Training Data & Accuracy"
|
||||
**Dataset**: [WebFace4M / WebFace12M](datasets.md#webface4m--webface12m) (4M / 12M images)
|
||||
|
||||
**Accuracy**: IJB-B and IJB-C benchmarks, TAR@FAR=0.01%
|
||||
|
||||
!!! tip "Key Innovation"
|
||||
AdaFace introduces adaptive margin that adjusts based on image quality, providing better performance on low-quality images compared to fixed-margin approaches.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
| ----------- | --------- | ------ | ----- | ------ | ------ | -------- | ----- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [WebFace600K](datasets.md#webface600k) (600K images)
|
||||
|
||||
**Accuracy**: IJB-C accuracy reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! note "Architecture"
|
||||
SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
Facial landmark localization model.
|
||||
|
||||
| Model Name | Points | Params | Size |
|
||||
| ---------- | ------ | ------ | ---- |
|
||||
| `2D106` | 106 | 3.7M | 14MB |
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Group | Points | Count |
|
||||
|-------|--------|-------|
|
||||
| Face contour | 0-32 | 33 points |
|
||||
| Eyebrows | 33-50 | 18 points |
|
||||
| Nose | 51-62 | 12 points |
|
||||
| Eyes | 63-86 | 24 points |
|
||||
| Mouth | 87-105 | 19 points |
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | ----------- | ------ | ---- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [CelebA](datasets.md#celeba)
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | --------------------- | ------ | ----- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [FairFace](datasets.md#fairface) dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size |
|
||||
| ------------- | ------- | ------ | ---- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [AffectNet](datasets.md#affectnet)
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation Models
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Gaze direction prediction models trained on [Gaze360](datasets.md#gaze360) dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* |
|
||||
| -------------- | ------ | ------- | ----- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [Gaze360](datasets.md#gaze360) (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes |
|
||||
| -------------- | ------ | ------- | ------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on [CelebAMask-HQ](datasets.md#celebamask-hq)
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 0 | Background | 7 | Left Ear | 14 | Neck |
|
||||
| 1 | Skin | 8 | Right Ear | 15 | Neck Lace |
|
||||
| 2 | Left Eyebrow | 9 | Ear Ring | 16 | Cloth |
|
||||
| 3 | Right Eyebrow | 10 | Nose | 17 | Hair |
|
||||
| 4 | Left Eye | 11 | Mouth | 18 | Hat |
|
||||
| 5 | Right Eye | 12 | Upper Lip | | |
|
||||
| 6 | Eye Glasses | 13 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
### XSeg
|
||||
|
||||
XSeg from DeepFaceLab outputs masks for face regions. Requires 5-point landmarks for face alignment.
|
||||
|
||||
| Model Name | Size | Output |
|
||||
|------------|--------|--------|
|
||||
| `DEFAULT` | 67 MB | Mask [0, 1] |
|
||||
|
||||
!!! info "Model Details"
|
||||
**Origin**: DeepFaceLab
|
||||
|
||||
**Input**: NHWC format, normalized to [0, 1]
|
||||
|
||||
**Alignment**: Requires 5-point landmarks (not bbox crops)
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face region extraction
|
||||
- Face swapping pipelines
|
||||
- Occlusion handling
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires 5-point facial landmarks. Use a face detector like RetinaFace to obtain landmarks first.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale |
|
||||
| ---------- | ------ | ----- |
|
||||
| `V1SE` | 1.2 MB | 4.0 |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
!!! info "Output Format"
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face bounding box from a detector.
|
||||
|
||||
---
|
||||
|
||||
## Model Management
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR` env var)
|
||||
- **Inspect cache path**: `get_cache_dir()` returns the resolved active path
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Concurrent download**: `download_models([...])` fetches multiple models in parallel
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full details.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **YOLOv8-Face Original**: [derronqi/yolov8-face](https://github.com/derronqi/yolov8-face) - Original PyTorch implementation
|
||||
- **YOLOv8-Face ONNX**: [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) - ONNX inference implementation
|
||||
- **AdaFace Original**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace) - Original PyTorch implementation
|
||||
- **AdaFace ONNX**: [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) - ONNX export and inference
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Segmentation**: [yakhyo/face-segmentation](https://github.com/yakhyo/face-segmentation) - XSeg ONNX Inference
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **AdaFace**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
283
docs/modules/attributes.md
Normal file
@@ -0,0 +1,283 @@
|
||||
# Attributes
|
||||
|
||||
Facial attribute analysis for age, gender, race, and emotion detection.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Attributes | Size | Notes |
|
||||
|-------|------------|------|-------|
|
||||
| **AgeGender** | Age, Gender | 8 MB | Exact age prediction |
|
||||
| **FairFace** | Gender, Age Group, Race | 44 MB | Balanced demographics |
|
||||
| **Emotion** | 7-8 emotions | 2 MB | Requires PyTorch |
|
||||
|
||||
---
|
||||
|
||||
## AgeGender
|
||||
|
||||
Predicts exact age and binary gender.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}") # "Female" or "Male"
|
||||
print(f"Age: {result.age} years")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male" (property)
|
||||
result.age # int, age in years
|
||||
result.age_group # None (not provided by this model)
|
||||
result.race # None (not provided by this model)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace
|
||||
|
||||
Predicts gender, age group, and race with balanced demographics.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}")
|
||||
print(f"Age Group: {result.age_group}")
|
||||
print(f"Race: {result.race}")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male"
|
||||
result.age # None (not provided by this model)
|
||||
result.age_group # "20-29", "30-39", etc.
|
||||
result.race # Race/ethnicity label
|
||||
```
|
||||
|
||||
### Race Categories
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| White |
|
||||
| Black |
|
||||
| Latino Hispanic |
|
||||
| East Asian |
|
||||
| Southeast Asian |
|
||||
| Indian |
|
||||
| Middle Eastern |
|
||||
|
||||
### Age Groups
|
||||
|
||||
| Group |
|
||||
|-------|
|
||||
| 0-2 |
|
||||
| 3-9 |
|
||||
| 10-19 |
|
||||
| 20-29 |
|
||||
| 30-39 |
|
||||
| 40-49 |
|
||||
| 50-59 |
|
||||
| 60-69 |
|
||||
| 70+ |
|
||||
|
||||
---
|
||||
|
||||
## Emotion
|
||||
|
||||
Predicts facial emotions. Requires PyTorch.
|
||||
|
||||
!!! warning "Optional Dependency"
|
||||
Emotion detection requires PyTorch. Install with:
|
||||
```bash
|
||||
pip install torch
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
detector = RetinaFace()
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = emotion.predict(image, face.landmarks)
|
||||
print(f"Emotion: {result.emotion}")
|
||||
print(f"Confidence: {result.confidence:.2%}")
|
||||
```
|
||||
|
||||
### Emotion Classes
|
||||
|
||||
=== "7-Class (AFFECNET7)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Angry |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Angry |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
# 7-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
# 8-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Combining Models
|
||||
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Get exact age from AgeGender
|
||||
ag_result = age_gender.predict(image, face.bbox)
|
||||
|
||||
# Get race from FairFace
|
||||
ff_result = fairface.predict(image, face.bbox)
|
||||
|
||||
print(f"Gender: {ag_result.sex}")
|
||||
print(f"Exact Age: {ag_result.age}")
|
||||
print(f"Age Group: {ff_result.age_group}")
|
||||
print(f"Race: {ff_result.race}")
|
||||
```
|
||||
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
RetinaFace(),
|
||||
age_gender=AgeGender(),
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_attributes(image, face, result):
|
||||
"""Draw attributes on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Build label
|
||||
label = f"{result.sex}"
|
||||
if result.age:
|
||||
label += f", {result.age}y"
|
||||
if result.age_group:
|
||||
label += f", {result.age_group}"
|
||||
if result.race:
|
||||
label += f", {result.race}"
|
||||
|
||||
# Draw label
|
||||
cv2.putText(
|
||||
image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
|
||||
)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
image = draw_attributes(image, face, result)
|
||||
|
||||
cv2.imwrite("attributes.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Accuracy Notes
|
||||
|
||||
!!! note "Model Limitations"
|
||||
- **AgeGender**: Trained on CelebA; accuracy varies by demographic
|
||||
- **FairFace**: Trained for balanced demographics; better cross-racial accuracy
|
||||
- **Emotion**: Accuracy depends on facial expression clarity
|
||||
|
||||
Always test on your specific use case and consider cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Parsing](parsing.md) - Face semantic segmentation
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
293
docs/modules/detection.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# Detection
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides four detection models.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Easy | Medium | Hard | Landmarks |
|
||||
|-------|----------|------|------|--------|------|:---------:|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% | 91.0% | 86.6% | :material-check: |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% | 93.9% | 83.1% | :material-check: |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% | 92.6% | 83.2% | :material-check: |
|
||||
| **YOLOv8-Face** | YOLOv8n | 12 MB | 94.6% | 92.3% | 79.6% | :material-check: |
|
||||
|
||||
!!! note "Dataset"
|
||||
All models trained on WIDERFACE dataset.
|
||||
---
|
||||
|
||||
## RetinaFace
|
||||
|
||||
Single-shot face detector with multi-scale feature pyramid.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}") # (5, 2)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# High accuracy
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| MNET_025 | 0.4M | 1.7 MB | 88.5% | 87.0% | 80.6% |
|
||||
| MNET_050 | 1.0M | 2.6 MB | 89.4% | 88.0% | 82.4% |
|
||||
| MNET_V1 | 3.5M | 3.8 MB | 90.6% | 89.1% | 84.1% |
|
||||
| **MNET_V2** :material-check-circle: | 3.2M | 3.5 MB | 91.7% | 91.0% | 86.6% |
|
||||
| RESNET18 | 11.7M | 27 MB | 92.5% | 91.0% | 86.6% |
|
||||
| RESNET34 | 24.8M | 56 MB | 94.2% | 93.1% | 88.9% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
confidence_threshold=0.5, # Min confidence
|
||||
nms_threshold=0.4, # NMS IoU threshold
|
||||
input_size=(640, 640), # Input resolution
|
||||
dynamic_size=False, # Enable dynamic input size
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SCRFD
|
||||
|
||||
State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
|
||||
# High accuracy (default)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| SCRFD_500M_KPS | 0.6M | 2.5 MB | 90.6% | 88.1% | 68.5% |
|
||||
| **SCRFD_10G_KPS** :material-check-circle: | 4.2M | 17 MB | 95.2% | 93.9% | 83.1% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
input_size=(640, 640),
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv5-Face
|
||||
|
||||
YOLO-based detection optimized for faces.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5N)
|
||||
|
||||
# Balanced (default)
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5S)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV5N | 11 MB | 93.6% | 91.5% | 80.5% |
|
||||
| **YOLOV5S** :material-check-circle: | 28 MB | 94.3% | 92.6% | 83.2% |
|
||||
| YOLOV5M | 82 MB | 95.3% | 93.8% | 85.3% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv5-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv8-Face
|
||||
|
||||
Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regression.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8_LITE_S)
|
||||
|
||||
# Recommended (default)
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV8_LITE_S | 7.4 MB | 93.4% | 91.2% | 78.6% |
|
||||
| **YOLOV8N** :material-check-circle: | 12 MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv8-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv8Face(
|
||||
model_name=YOLOv8FaceWeights.YOLOV8N,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.45,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
detector = create_detector('scrfd')
|
||||
# or
|
||||
detector = create_detector('yolov5face')
|
||||
# or
|
||||
detector = create_detector('yolov8face')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
|
||||
```python
|
||||
for face in faces:
|
||||
# Bounding box [x1, y1, x2, y2]
|
||||
bbox = face.bbox
|
||||
|
||||
# Detection confidence (0-1)
|
||||
confidence = face.confidence
|
||||
|
||||
# 5-point landmarks (5, 2)
|
||||
landmarks = face.landmarks
|
||||
# [left_eye, right_eye, nose, left_mouth, right_mouth]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces],
|
||||
vis_threshold=0.6
|
||||
)
|
||||
|
||||
cv2.imwrite("result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detect.py --source image.jpg
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
|
||||
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters
|
||||
272
docs/modules/gaze.md
Normal file
@@ -0,0 +1,272 @@
|
||||
# Gaze Estimation
|
||||
|
||||
Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* |
|
||||
|-------|----------|------|------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° |
|
||||
|
||||
*MAE = Mean Absolute Error on Gaze360 test set (lower is better)
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
# Estimate gaze
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Convert to degrees
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
print(f"Pitch: {pitch_deg:.1f}°, Yaw: {yaw_deg:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
gaze = MobileGaze()
|
||||
|
||||
# Lightweight for mobile/edge
|
||||
gaze = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Higher accuracy
|
||||
gaze = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# GazeResult dataclass
|
||||
result.pitch # Vertical angle in radians
|
||||
result.yaw # Horizontal angle in radians
|
||||
```
|
||||
|
||||
### Angle Convention
|
||||
|
||||
```
|
||||
pitch = +90° (looking up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(looking left) │ (looking right)
|
||||
│
|
||||
pitch = -90° (looking down)
|
||||
```
|
||||
|
||||
- **Pitch**: Vertical gaze angle
|
||||
- Positive = looking up
|
||||
- Negative = looking down
|
||||
|
||||
- **Yaw**: Horizontal gaze angle
|
||||
- Positive = looking right
|
||||
- Negative = looking left
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw gaze arrow on image
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
### Custom Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
"""Draw custom gaze arrow."""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Face center
|
||||
cx = (x1 + x2) // 2
|
||||
cy = (y1 + y2) // 2
|
||||
|
||||
# Calculate endpoint
|
||||
dx = -length * np.sin(yaw) * np.cos(pitch)
|
||||
dy = -length * np.sin(pitch)
|
||||
|
||||
# Draw arrow
|
||||
end_x = int(cx + dx)
|
||||
end_y = int(cy + dy)
|
||||
|
||||
cv2.arrowedLine(image, (cx, cy), (end_x, end_y), color, 2, tipLength=0.3)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Gaze Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Draw gaze
|
||||
draw_gaze(frame, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
# Display angles
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
label = f"P:{pitch_deg:.0f} Y:{yaw_deg:.0f}"
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
cv2.imshow("Gaze Estimation", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Attention Detection
|
||||
|
||||
```python
|
||||
def is_looking_at_camera(result, threshold=15):
|
||||
"""Check if person is looking at camera."""
|
||||
pitch_deg = abs(np.degrees(result.pitch))
|
||||
yaw_deg = abs(np.degrees(result.yaw))
|
||||
|
||||
return pitch_deg < threshold and yaw_deg < threshold
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
if is_looking_at_camera(result):
|
||||
print("Looking at camera")
|
||||
else:
|
||||
print("Looking away")
|
||||
```
|
||||
|
||||
### Gaze Direction Classification
|
||||
|
||||
```python
|
||||
def classify_gaze_direction(result, threshold=20):
|
||||
"""Classify gaze into directions."""
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
directions = []
|
||||
|
||||
if pitch_deg > threshold:
|
||||
directions.append("up")
|
||||
elif pitch_deg < -threshold:
|
||||
directions.append("down")
|
||||
|
||||
if yaw_deg > threshold:
|
||||
directions.append("right")
|
||||
elif yaw_deg < -threshold:
|
||||
directions.append("left")
|
||||
|
||||
if not directions:
|
||||
return "center"
|
||||
|
||||
return " ".join(directions)
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
direction = classify_gaze_direction(result)
|
||||
print(f"Looking: {direction}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.gaze import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
172
docs/modules/indexing.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# Indexing
|
||||
|
||||
FAISS-backed vector store for fast similarity search over embeddings.
|
||||
|
||||
!!! info "Optional dependency"
|
||||
```bash
|
||||
pip install faiss-cpu
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAISS
|
||||
|
||||
```python
|
||||
from uniface.indexing import FAISS
|
||||
```
|
||||
|
||||
A thin wrapper around a FAISS `IndexFlatIP` (inner-product) index. Vectors
|
||||
**must** be L2-normalised before adding so that inner product equals cosine
|
||||
similarity. The store does not normalise internally.
|
||||
|
||||
Each vector is paired with a metadata `dict` that can carry any
|
||||
JSON-serialisable payload (person ID, name, source path, etc.).
|
||||
|
||||
### Constructor
|
||||
|
||||
```python
|
||||
store = FAISS(embedding_size=512, db_path="./vector_index")
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding_size` | `int` | `512` | Dimension of embedding vectors |
|
||||
| `db_path` | `str` | `"./vector_index"` | Directory for persisting index and metadata |
|
||||
|
||||
---
|
||||
|
||||
### Methods
|
||||
|
||||
#### `add(embedding, metadata)`
|
||||
|
||||
Add a single embedding with associated metadata.
|
||||
|
||||
```python
|
||||
store.add(embedding, {"person_id": "alice", "source": "photo.jpg"})
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `embedding` | `np.ndarray` | L2-normalised embedding vector |
|
||||
| `metadata` | `dict[str, Any]` | Arbitrary JSON-serialisable key-value pairs |
|
||||
|
||||
---
|
||||
|
||||
#### `search(embedding, threshold=0.4)`
|
||||
|
||||
Find the closest match for a query embedding.
|
||||
|
||||
```python
|
||||
result, similarity = store.search(query_embedding, threshold=0.4)
|
||||
if result:
|
||||
print(result["person_id"], similarity)
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding` | `np.ndarray` | — | L2-normalised query vector |
|
||||
| `threshold` | `float` | `0.4` | Minimum cosine similarity to accept a match |
|
||||
|
||||
**Returns:** `(metadata, similarity)` if a match is found, or `(None, similarity)` when below threshold or the index is empty.
|
||||
|
||||
---
|
||||
|
||||
#### `remove(key, value)`
|
||||
|
||||
Remove all entries where `metadata[key] == value` and rebuild the index.
|
||||
|
||||
```python
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `key` | `str` | Metadata key to match |
|
||||
| `value` | `Any` | Value to match |
|
||||
|
||||
**Returns:** Number of entries removed.
|
||||
|
||||
---
|
||||
|
||||
#### `save()`
|
||||
|
||||
Persist the FAISS index and metadata to disk.
|
||||
|
||||
```python
|
||||
store.save()
|
||||
```
|
||||
|
||||
Writes two files to `db_path`:
|
||||
|
||||
- `faiss_index.bin` — binary FAISS index
|
||||
- `metadata.json` — JSON array of metadata dicts
|
||||
|
||||
---
|
||||
|
||||
#### `load()`
|
||||
|
||||
Load a previously saved index and metadata.
|
||||
|
||||
```python
|
||||
store = FAISS(db_path="./vector_index")
|
||||
loaded = store.load() # True if files exist
|
||||
```
|
||||
|
||||
**Returns:** `True` if loaded successfully, `False` if files are missing.
|
||||
|
||||
**Raises:** `RuntimeError` if files exist but cannot be read.
|
||||
|
||||
---
|
||||
|
||||
### Properties
|
||||
|
||||
| Property | Type | Description |
|
||||
|----------|------|-------------|
|
||||
| `size` | `int` | Number of vectors in the index |
|
||||
| `len(store)` | `int` | Same as `size` |
|
||||
|
||||
---
|
||||
|
||||
## Example: End-to-End
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
image = cv2.imread("alice.jpg")
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(embedding, {"person_id": "alice"})
|
||||
store.save()
|
||||
|
||||
# Search
|
||||
store2 = FAISS(db_path="./my_index")
|
||||
store2.load()
|
||||
|
||||
query = cv2.imread("unknown.jpg")
|
||||
faces = detector.detect(query)
|
||||
emb = recognizer.get_normalized_embedding(query, faces[0].landmarks)
|
||||
|
||||
result, sim = store2.search(emb)
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} (similarity: {sim:.3f})")
|
||||
else:
|
||||
print(f"No match (similarity: {sim:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Building and querying indexes
|
||||
- [Recognition Module](recognition.md) - Embedding extraction
|
||||
- [Thresholds Guide](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
252
docs/modules/landmarks.md
Normal file
@@ -0,0 +1,252 @@
|
||||
# Landmarks
|
||||
|
||||
Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Points | Size |
|
||||
|-------|--------|------|
|
||||
| **Landmark106** | 106 | 14 MB |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
|
||||
|
||||
---
|
||||
|
||||
## 106-Point Landmarks
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Get detailed landmarks
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Landmarks shape: {landmarks.shape}") # (106, 2)
|
||||
```
|
||||
|
||||
### Landmark Groups
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
### Extract Specific Features
|
||||
|
||||
```python
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
|
||||
# Face contour
|
||||
contour = landmarks[0:33]
|
||||
|
||||
# Left eyebrow
|
||||
left_eyebrow = landmarks[33:42]
|
||||
|
||||
# Right eyebrow
|
||||
right_eyebrow = landmarks[42:51]
|
||||
|
||||
# Nose
|
||||
nose = landmarks[51:63]
|
||||
|
||||
# Left eye
|
||||
left_eye = landmarks[63:72]
|
||||
|
||||
# Right eye
|
||||
right_eye = landmarks[76:84]
|
||||
|
||||
# Mouth
|
||||
mouth = landmarks[87:106]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5-Point Landmarks (Detection)
|
||||
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks_5 = faces[0].landmarks
|
||||
print(f"Shape: {landmarks_5.shape}") # (5, 2)
|
||||
|
||||
left_eye = landmarks_5[0]
|
||||
right_eye = landmarks_5[1]
|
||||
nose = landmarks_5[2]
|
||||
left_mouth = landmarks_5[3]
|
||||
right_mouth = landmarks_5[4]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
### Draw 106 Landmarks
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_landmarks(image, landmarks, color=(0, 255, 0), radius=2):
|
||||
"""Draw landmarks on image."""
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), radius, color, -1)
|
||||
return image
|
||||
|
||||
# Usage
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
image_with_landmarks = draw_landmarks(image.copy(), landmarks)
|
||||
cv2.imwrite("landmarks.jpg", image_with_landmarks)
|
||||
```
|
||||
|
||||
### Draw with Connections
|
||||
|
||||
```python
|
||||
def draw_landmarks_with_connections(image, landmarks):
|
||||
"""Draw landmarks with facial feature connections."""
|
||||
landmarks = landmarks.astype(int)
|
||||
|
||||
# Face contour (0-32)
|
||||
for i in range(32):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (255, 255, 0), 1)
|
||||
|
||||
# Left eyebrow (33-41)
|
||||
for i in range(33, 41):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Right eyebrow (42-50)
|
||||
for i in range(42, 50):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Nose (51-62)
|
||||
for i in range(51, 62):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 0, 255), 1)
|
||||
|
||||
# Draw points
|
||||
for x, y in landmarks:
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 255), -1)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
# Returns: 112x112 aligned face
|
||||
```
|
||||
|
||||
### Eye Aspect Ratio (Blink Detection)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def eye_aspect_ratio(eye_landmarks):
|
||||
"""Calculate eye aspect ratio for blink detection."""
|
||||
# Vertical distances
|
||||
v1 = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
|
||||
v2 = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
|
||||
|
||||
# Horizontal distance
|
||||
h = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
|
||||
|
||||
ear = (v1 + v2) / (2.0 * h)
|
||||
return ear
|
||||
|
||||
# Usage with 106-point landmarks
|
||||
left_eye = landmarks[63:72] # Approximate eye points
|
||||
ear = eye_aspect_ratio(left_eye)
|
||||
|
||||
if ear < 0.2:
|
||||
print("Eye closed (blink detected)")
|
||||
```
|
||||
|
||||
### Head Pose Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def estimate_head_pose(landmarks, image_shape):
|
||||
"""Estimate head pose from facial landmarks."""
|
||||
# 3D model points (generic face model)
|
||||
model_points = np.array([
|
||||
(0.0, 0.0, 0.0), # Nose tip
|
||||
(0.0, -330.0, -65.0), # Chin
|
||||
(-225.0, 170.0, -135.0), # Left eye corner
|
||||
(225.0, 170.0, -135.0), # Right eye corner
|
||||
(-150.0, -150.0, -125.0), # Left mouth corner
|
||||
(150.0, -150.0, -125.0) # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# 2D image points (from 106 landmarks)
|
||||
image_points = np.array([
|
||||
landmarks[51], # Nose tip
|
||||
landmarks[16], # Chin
|
||||
landmarks[63], # Left eye corner
|
||||
landmarks[76], # Right eye corner
|
||||
landmarks[87], # Left mouth corner
|
||||
landmarks[93] # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# Camera matrix
|
||||
h, w = image_shape[:2]
|
||||
focal_length = w
|
||||
center = (w / 2, h / 2)
|
||||
camera_matrix = np.array([
|
||||
[focal_length, 0, center[0]],
|
||||
[0, focal_length, center[1]],
|
||||
[0, 0, 1]
|
||||
], dtype=np.float64)
|
||||
|
||||
# Solve PnP
|
||||
dist_coeffs = np.zeros((4, 1))
|
||||
success, rotation_vector, translation_vector = cv2.solvePnP(
|
||||
model_points, image_points, camera_matrix, dist_coeffs
|
||||
)
|
||||
|
||||
return rotation_vector, translation_vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.landmark import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Face detection with 5-point landmarks
|
||||
- [Attributes Module](attributes.md) - Age, gender, emotion
|
||||
- [Gaze Module](gaze.md) - Gaze estimation
|
||||
- [Concepts: Coordinate Systems](../concepts/coordinate-systems.md) - Landmark formats
|
||||
333
docs/modules/parsing.md
Normal file
@@ -0,0 +1,333 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components or face regions.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Output |
|
||||
|-------|----------|------|--------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 classes |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 classes |
|
||||
| XSeg | - | 67 MB | Mask |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save result
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 19 Facial Component Classes
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (ResNet18)
|
||||
parser = BiSeNet()
|
||||
|
||||
# Higher accuracy (ResNet34)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size |
|
||||
|---------|--------|------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB |
|
||||
| RESNET34 | 24.1M | 89 MB |
|
||||
|
||||
---
|
||||
|
||||
## Full Pipeline
|
||||
|
||||
### With Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(f"face_{i}_parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Specific Components
|
||||
|
||||
### Get Single Component Mask
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 17
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
# Binary mask for skin
|
||||
skin_mask = (mask == SKIN).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for hair
|
||||
hair_mask = (mask == HAIR).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for eyes
|
||||
eyes_mask = ((mask == LEFT_EYE) | (mask == RIGHT_EYE)).astype(np.uint8) * 255
|
||||
```
|
||||
|
||||
### Count Pixels per Component
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Eyeglasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Earring', 10: 'Nose', 11: 'Mouth',
|
||||
12: 'U-Lip', 13: 'L-Lip', 14: 'Neck', 15: 'Necklace',
|
||||
16: 'Cloth', 17: 'Hair', 18: 'Hat'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
pixel_count = np.sum(mask == class_id)
|
||||
name = component_names.get(class_id, f'Class {class_id}')
|
||||
print(f"{name}: {pixel_count} pixels")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Applications
|
||||
|
||||
### Face Makeup
|
||||
|
||||
Apply virtual makeup using component masks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper lip=12, lower lip=13)
|
||||
lip_mask = ((mask == 12) | (mask == 13)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Alpha blend lip region
|
||||
alpha = 0.4
|
||||
mask_3ch = lip_mask[:, :, np.newaxis]
|
||||
result = np.where(mask_3ch, (image * (1 - alpha) + overlay * alpha).astype(np.uint8), result)
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
|
||||
```python
|
||||
def replace_background(image, mask, background):
|
||||
"""Replace background using parsing mask."""
|
||||
# Create foreground mask (everything except background)
|
||||
foreground_mask = (mask != 0).astype(np.uint8)
|
||||
|
||||
# Resize background to match image
|
||||
background = cv2.resize(background, (image.shape[1], image.shape[0]))
|
||||
|
||||
# Combine
|
||||
result = image.copy()
|
||||
result[foreground_mask == 0] = background[foreground_mask == 0]
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Hair Segmentation
|
||||
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 17).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_CLOSE, kernel)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
return hair_mask
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
|
||||
# With different parameters
|
||||
vis_result = vis_parsing_maps(
|
||||
face_rgb,
|
||||
mask,
|
||||
save_image=False, # Don't save to file
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## XSeg
|
||||
|
||||
XSeg outputs a mask for face regions. Unlike BiSeNet which works on bbox crops, XSeg requires 5-point landmarks for face alignment.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = XSeg()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
if face.landmarks is not None:
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W), values in [0, 1]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
```python
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
# Default settings
|
||||
parser = XSeg()
|
||||
|
||||
# Custom settings
|
||||
parser = XSeg(
|
||||
align_size=256, # Face alignment size
|
||||
blur_sigma=5, # Gaussian blur for smoothing (0 = raw)
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `align_size` | 256 | Face alignment output size |
|
||||
| `blur_sigma` | 0 | Mask smoothing (0 = no blur) |
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Full pipeline: align -> segment -> warp back to original space
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
# For pre-aligned face crops
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
# Get mask + crop + inverse matrix for custom warping
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
```
|
||||
|
||||
### BiSeNet vs XSeg
|
||||
|
||||
| Feature | BiSeNet | XSeg |
|
||||
|---------|---------|------|
|
||||
| Output | 19 class labels | Mask [0, 1] |
|
||||
| Input | Bbox crop | Requires landmarks |
|
||||
| Use case | Facial components | Face region extraction |
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.parsing import create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
|
||||
# BiSeNet (default)
|
||||
parser = create_face_parser()
|
||||
|
||||
# XSeg
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
260
docs/modules/privacy.md
Normal file
@@ -0,0 +1,260 @@
|
||||
# Privacy
|
||||
|
||||
Face anonymization protects privacy by blurring or obscuring faces in images and videos.
|
||||
|
||||
---
|
||||
|
||||
## Available Methods
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| **pixelate** | Blocky pixelation |
|
||||
| **gaussian** | Smooth blur |
|
||||
| **blackout** | Solid color fill |
|
||||
| **elliptical** | Oval-shaped blur |
|
||||
| **median** | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Blur Methods
|
||||
|
||||
### Pixelate
|
||||
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=15)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 15 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
Smooth, natural-looking blur:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity (higher = more blur) |
|
||||
|
||||
### Blackout
|
||||
|
||||
Solid color fill for maximum privacy:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `color` | (0, 0, 0) | Fill color (BGR format) |
|
||||
|
||||
### Elliptical
|
||||
|
||||
Oval-shaped blur matching natural face shape:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=20)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
| `margin` | 20 | Margin around face |
|
||||
|
||||
### Median
|
||||
|
||||
Edge-preserving blur with artistic effect:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
|
||||
---
|
||||
|
||||
## In-Place Processing
|
||||
|
||||
Modify image directly (faster, saves memory):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
# In-place modification
|
||||
result = blurrer.anonymize(image, faces, inplace=True)
|
||||
# 'image' and 'result' point to the same array
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Anonymization
|
||||
|
||||
### Webcam
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Video File
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input_video.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (width, height))
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Selective Anonymization
|
||||
|
||||
### Exclude Specific Faces
|
||||
|
||||
```python
|
||||
def anonymize_except(image, all_faces, exclude_embeddings, recognizer, threshold=0.6):
|
||||
"""Anonymize all faces except those matching exclude_embeddings."""
|
||||
faces_to_blur = []
|
||||
|
||||
for face in all_faces:
|
||||
# Get embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Check if should be excluded
|
||||
should_exclude = False
|
||||
for ref_emb in exclude_embeddings:
|
||||
similarity = np.dot(embedding, ref_emb.T)[0][0]
|
||||
if similarity > threshold:
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if not should_exclude:
|
||||
faces_to_blur.append(face)
|
||||
|
||||
# Blur remaining faces
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
### Confidence-Based
|
||||
|
||||
```python
|
||||
def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
"""Anonymize faces below confidence threshold."""
|
||||
faces_to_blur = [f for f in faces if f.confidence < confidence_threshold]
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparison
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
methods = ['pixelate', 'gaussian', 'blackout', 'elliptical', 'median']
|
||||
|
||||
for method in methods:
|
||||
blurrer = BlurFace(method=method)
|
||||
result = blurrer.anonymize(image.copy(), faces)
|
||||
cv2.imwrite(f"anonymized_{method}.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anonymize Stream Recipe](../recipes/anonymize-stream.md) - Video pipeline
|
||||
- [Detection](detection.md) - Face detection options
|
||||
- [Batch Processing Recipe](../recipes/batch-processing.md) - Process multiple files
|
||||
301
docs/modules/recognition.md
Normal file
@@ -0,0 +1,301 @@
|
||||
# Recognition
|
||||
|
||||
Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Embedding Dim |
|
||||
|-------|----------|------|---------------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 |
|
||||
|
||||
---
|
||||
|
||||
## AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.recognition import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_18)
|
||||
|
||||
# High accuracy
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_101)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = AdaFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Dataset | Size | IJB-B | IJB-C |
|
||||
|---------|---------|------|-------|-------|
|
||||
| **IR_18** :material-check-circle: | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| IR_101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Benchmark Metrics"
|
||||
IJB-B and IJB-C accuracy reported as TAR@FAR=0.01%
|
||||
|
||||
---
|
||||
|
||||
## ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.MNET)
|
||||
|
||||
# High accuracy
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Backbone | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
|---------|----------|------|-----|--------|----------|-------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| RESNET | ResNet50 | 166 MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data & Metrics"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.recognition import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.recognition import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
# Higher accuracy
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V3_LARGE)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
---
|
||||
|
||||
## SphereFace
|
||||
|
||||
Face recognition using angular softmax loss (A-Softmax).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.recognition import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
---
|
||||
|
||||
## Face Comparison
|
||||
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface.face_utils import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, landmarks1)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, landmarks2)
|
||||
|
||||
# Method 1: Using utility function
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Method 2: Direct computation
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Threshold Guidelines
|
||||
|
||||
| Threshold | Decision | Use Case |
|
||||
|-----------|----------|----------|
|
||||
| > 0.7 | Very high confidence | Security-critical |
|
||||
| > 0.6 | Same person | General verification |
|
||||
| 0.4 - 0.6 | Uncertain | Manual review needed |
|
||||
| < 0.4 | Different people | Rejection |
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Recognition models require aligned faces. UniFace handles this internally:
|
||||
|
||||
```python
|
||||
# Alignment is done automatically
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Building a Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Save for later use
|
||||
np.savez('face_database.npz', **database)
|
||||
|
||||
# Load database
|
||||
data = np.load('face_database.npz')
|
||||
database = {key: data[key] for key in data.files}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Search
|
||||
|
||||
Find a person in a database:
|
||||
|
||||
```python
|
||||
def search_face(query_embedding, database, threshold=0.6):
|
||||
"""Find best match in database."""
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
|
||||
if similarity > best_similarity and similarity > threshold:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
return best_match, best_similarity
|
||||
|
||||
# Usage
|
||||
query_embedding = recognizer.get_normalized_embedding(query_image, landmarks)
|
||||
match, similarity = search_face(query_embedding, database)
|
||||
|
||||
if match:
|
||||
print(f"Found: {match} (similarity: {similarity:.4f})")
|
||||
else:
|
||||
print("No match found")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
recognizer = create_recognizer('adaface')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Detect faces first
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Complete search system
|
||||
- [Thresholds](../concepts/thresholds-calibration.md) - Calibration guide
|
||||
267
docs/modules/spoofing.md
Normal file
@@ -0,0 +1,267 @@
|
||||
# Anti-Spoofing
|
||||
|
||||
Face anti-spoofing detects whether a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Size |
|
||||
|-------|------|
|
||||
| MiniFASNet V1SE | 1.2 MB |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# SpoofingResult dataclass
|
||||
result.is_real # True = real, False = fake
|
||||
result.confidence # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
```
|
||||
|
||||
| Variant | Size | Scale Factor |
|
||||
|---------|------|--------------|
|
||||
| V1SE | 1.2 MB | 4.0 |
|
||||
| **V2** :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
---
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
`result.is_real` is based on the model's top predicted class (argmax). If you want stricter behavior,
|
||||
apply your own confidence threshold:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.is_real and result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced (argmax decision)
|
||||
if result.is_real:
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_spoofing_result(image, face, result):
|
||||
"""Draw spoofing result on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Color based on result
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
# Draw label
|
||||
text = f"{label}: {result.confidence:.1%}"
|
||||
cv2.putText(image, text, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
image = draw_spoofing_result(image, face, result)
|
||||
|
||||
cv2.imwrite("spoofing_result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Liveness Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
|
||||
# Draw result
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = f"{'Real' if result.is_real else 'Fake'}: {result.confidence:.0%}"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Liveness Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Access Control
|
||||
|
||||
```python
|
||||
def verify_liveness(image, face, spoofer, threshold=0.6):
|
||||
"""Verify face is real for access control."""
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
if result.is_real and result.confidence > threshold:
|
||||
return True, result.confidence
|
||||
return False, result.confidence
|
||||
|
||||
# Usage
|
||||
is_live, confidence = verify_liveness(image, face, spoofer)
|
||||
if is_live:
|
||||
print(f"Access granted (confidence: {confidence:.1%})")
|
||||
else:
|
||||
print(f"Access denied - possible spoof attempt")
|
||||
```
|
||||
|
||||
### Multi-Frame Verification
|
||||
|
||||
For higher security, verify across multiple frames:
|
||||
|
||||
```python
|
||||
def verify_liveness_multiframe(frames, detector, spoofer, min_real=3):
|
||||
"""Verify liveness across multiple frames."""
|
||||
real_count = 0
|
||||
|
||||
for frame in frames:
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
continue
|
||||
|
||||
result = spoofer.predict(frame, faces[0].bbox)
|
||||
if result.is_real:
|
||||
real_count += 1
|
||||
|
||||
return real_count >= min_real
|
||||
|
||||
# Collect frames and verify
|
||||
frames = []
|
||||
for _ in range(5):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frames.append(frame)
|
||||
|
||||
is_verified = verify_liveness_multiframe(frames, detector, spoofer)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attack Types Detected
|
||||
|
||||
MiniFASNet can detect various spoof attacks:
|
||||
|
||||
| Attack Type | Detection |
|
||||
|-------------|-----------|
|
||||
| Printed photos | ✅ |
|
||||
| Screen replay | ✅ |
|
||||
| Video replay | ✅ |
|
||||
| Paper masks | ✅ |
|
||||
| 3D masks | Limited |
|
||||
|
||||
!!! warning "Limitations"
|
||||
- High-quality 3D masks may not be detected
|
||||
- Performance varies with lighting and image quality
|
||||
- Always combine with other verification methods for high-security applications
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
- [Recognition](recognition.md) - Face recognition
|
||||
263
docs/modules/tracking.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Tracking
|
||||
|
||||
Multi-object tracking using [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) with Kalman filtering and IoU-based association. The tracker assigns persistent IDs to detected objects across video frames using a two-stage association strategy — first matching high-confidence detections, then low-confidence ones.
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
BYTETracker takes detection bounding boxes as input and returns tracked bounding boxes with persistent IDs. It does not depend on any specific detector — any source of `[x1, y1, x2, y2, score]` arrays will work.
|
||||
|
||||
Each frame, the tracker:
|
||||
|
||||
1. Splits detections into high-confidence and low-confidence groups
|
||||
2. Matches high-confidence detections to existing tracks using IoU
|
||||
3. Matches remaining tracks to low-confidence detections (second chance)
|
||||
4. Starts new tracks for unmatched high-confidence detections
|
||||
5. Removes tracks that have been lost for too long
|
||||
|
||||
The Kalman filter predicts where each track will be in the next frame, which helps maintain associations even when detections are noisy.
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# 1. Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# 2. Build detections array: [x1, y1, x2, y2, score]
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# 3. Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# 4. Map track IDs back to face objects
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
# 5. Draw
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
Each track ID gets a deterministic color via golden-ratio hue stepping, so the same person keeps the same color across the entire video.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking - Press 'q' to quit", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parameters
|
||||
|
||||
```python
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.5,
|
||||
track_buffer=30,
|
||||
match_thresh=0.8,
|
||||
low_thresh=0.1,
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `track_thresh` | 0.5 | Detections above this score go through first-pass association |
|
||||
| `track_buffer` | 30 | How many frames to keep a lost track before removing it |
|
||||
| `match_thresh` | 0.8 | IoU threshold for matching tracks to detections |
|
||||
| `low_thresh` | 0.1 | Detections below this score are discarded entirely |
|
||||
|
||||
---
|
||||
|
||||
## Input / Output
|
||||
|
||||
**Input** — `(N, 5)` numpy array with `[x1, y1, x2, y2, confidence]` per detection:
|
||||
|
||||
```python
|
||||
detections = np.array([
|
||||
[100, 50, 200, 160, 0.95],
|
||||
[300, 80, 380, 200, 0.87],
|
||||
])
|
||||
```
|
||||
|
||||
**Output** — `(M, 5)` numpy array with `[x1, y1, x2, y2, track_id]` per active track:
|
||||
|
||||
```python
|
||||
tracks = tracker.update(detections)
|
||||
# array([[101.2, 51.3, 199.8, 159.8, 1.],
|
||||
# [300.5, 80.2, 379.7, 200.1, 2.]])
|
||||
```
|
||||
|
||||
The output bounding boxes come from the Kalman filter prediction, so they may differ slightly from the input. Track IDs are integers that persist across frames for the same object.
|
||||
|
||||
---
|
||||
|
||||
## Resetting the Tracker
|
||||
|
||||
When switching to a different video or scene, reset the tracker to clear all internal state:
|
||||
|
||||
```python
|
||||
tracker.reset()
|
||||
```
|
||||
|
||||
This clears all active, lost, and removed tracks, resets the frame counter, and resets the ID counter back to zero.
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
`draw_tracks` draws bounding boxes color-coded by track ID:
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
draw_tracks(
|
||||
image=frame,
|
||||
faces=tracked_faces,
|
||||
draw_landmarks=True,
|
||||
draw_id=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Small Face Performance
|
||||
|
||||
!!! warning "Tracking performance with small faces"
|
||||
The tracker relies on IoU (Intersection over Union) to match detections across
|
||||
frames. When faces occupy a small portion of the image — for example in
|
||||
surveillance footage or wide-angle cameras — even slight movement between frames
|
||||
can cause a large drop in IoU. This makes it harder for the tracker to maintain
|
||||
consistent IDs, and you may see IDs switching or resetting more often than expected.
|
||||
|
||||
This is not specific to BYTETracker; it applies to any IoU-based tracker. A few
|
||||
things that can help:
|
||||
|
||||
- **Lower `match_thresh`** (e.g. `0.5` or `0.6`) so the tracker accepts lower
|
||||
overlap as a valid match.
|
||||
- **Increase `track_buffer`** (e.g. `60` or higher) to hold onto lost tracks
|
||||
longer before discarding them.
|
||||
- **Use a higher-resolution input** if possible, so face bounding boxes are
|
||||
larger in pixel terms.
|
||||
|
||||
```python
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.4,
|
||||
track_buffer=60,
|
||||
match_thresh=0.6,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Tool
|
||||
|
||||
```bash
|
||||
# Track faces in a video
|
||||
python tools/track.py --source video.mp4
|
||||
|
||||
# Webcam
|
||||
python tools/track.py --source 0
|
||||
|
||||
# Save output
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
|
||||
# Use RetinaFace instead of SCRFD
|
||||
python tools/track.py --source video.mp4 --detector retinaface
|
||||
|
||||
# Keep lost tracks longer
|
||||
python tools/track.py --source video.mp4 --track-buffer 60
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [yakhyo/bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) — standalone BYTETracker implementation used in UniFace
|
||||
- [ByteTrack paper](https://arxiv.org/abs/2110.06864) — Zhang et al., "ByteTrack: Multi-Object Tracking by Associating Every Detection Box"
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection](detection.md) — face detection models
|
||||
- [Video & Webcam](../recipes/video-webcam.md) — video processing patterns
|
||||
- [Inputs & Outputs](../concepts/inputs-outputs.md) — data types and formats
|
||||
59
docs/notebooks.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Interactive Notebooks
|
||||
|
||||
Run UniFace examples directly in your browser with Google Colab, or download and run locally with Jupyter.
|
||||
|
||||
---
|
||||
|
||||
## Available Notebooks
|
||||
|
||||
| Notebook | Colab | Description |
|
||||
|----------|:-----:|-------------|
|
||||
| [Face Detection](https://github.com/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Detect faces and 5-point landmarks |
|
||||
| [Face Alignment](https://github.com/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Align faces for recognition |
|
||||
| [Face Verification](https://github.com/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [Face Search](https://github.com/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one face analysis |
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [Face Segmentation](https://github.com/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [Face Vector Store](https://github.com/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
Download and run notebooks on your machine:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dependencies
|
||||
pip install uniface jupyter
|
||||
|
||||
# Launch Jupyter
|
||||
jupyter notebook examples/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running on Google Colab
|
||||
|
||||
Click any **"Open in Colab"** badge above. The notebooks automatically:
|
||||
|
||||
1. Install UniFace via pip
|
||||
2. Clone the repository to access test images
|
||||
3. Set up the correct working directory
|
||||
|
||||
!!! tip "GPU Acceleration"
|
||||
In Colab, go to **Runtime → Change runtime type → GPU** for faster inference.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart](quickstart.md) - Code snippets for common use cases
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step workflow guides
|
||||
- [API Reference](modules/detection.md) - Detailed module documentation
|
||||
5
docs/overrides/home.html
Normal file
@@ -0,0 +1,5 @@
|
||||
{% extends "main.html" %}
|
||||
|
||||
{% block source %}
|
||||
<!-- Hide edit/view source on home page -->
|
||||
{% endblock %}
|
||||
7
docs/overrides/main.html
Normal file
@@ -0,0 +1,7 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block announce %}
|
||||
<a href="https://github.com/yakhyo/uniface" target="_blank" rel="noopener">
|
||||
Support our work — give UniFace a <span class="twemoji">{% include ".icons/octicons/star-fill-16.svg" %}</span> on <strong>GitHub</strong> and help us reach more developers!
|
||||
</a>
|
||||
{% endblock %}
|
||||
489
docs/quickstart.md
Normal file
@@ -0,0 +1,489 @@
|
||||
# Quickstart
|
||||
|
||||
Get up and running with UniFace in 5 minutes. This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualize Detections
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
```
|
||||
|
||||
!!! tip "Similarity Thresholds"
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## Age & Gender Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace Attributes
|
||||
|
||||
Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmarks (106 Points)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing
|
||||
|
||||
Segment face into semantic components:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Anonymization
|
||||
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Custom blur settings:**
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `pixelate` | Blocky effect (news media standard) |
|
||||
| `gaussian` | Smooth, natural blur |
|
||||
| `blackout` | Solid color boxes (maximum privacy) |
|
||||
| `elliptical` | Soft oval blur (natural face shape) |
|
||||
| `median` | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Face Anti-Spoofing
|
||||
|
||||
Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Webcam Demo
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks)
|
||||
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Tracking
|
||||
|
||||
Track faces across video frames with persistent IDs:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# Assign track IDs to faces
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details, see the [Tracking module](modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
|
||||
**Available models by task:**
|
||||
|
||||
| Task | Available Models |
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Tracking | `BYTETracker` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
| Anti-Spoofing | `MiniFASNet` (V1SE, V2) |
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Models Not Downloading
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, AdaFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.parsing import BiSeNet, XSeg
|
||||
from uniface.privacy import BlurFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.indexing import FAISS # pip install faiss-cpu
|
||||
from uniface.draw import draw_detections, draw_tracks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Zoo](models.md) - All models, benchmarks, and selection guide
|
||||
- [API Reference](modules/detection.md) - Explore individual modules and their APIs
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step examples for common workflows
|
||||
- [Guides](concepts/overview.md) - Learn about the architecture and design principles
|
||||
104
docs/recipes/anonymize-stream.md
Normal file
@@ -0,0 +1,104 @@
|
||||
# Anonymize Stream
|
||||
|
||||
Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
w, h = int(cap.get(3)), int(cap.get(4))
|
||||
|
||||
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Single Image
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
result = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Blur Methods
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=15)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
| Median | `BlurFace(method='median', blur_strength=3.0)` |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Privacy Module](../modules/privacy.md) - Privacy protection details
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Detection Module](../modules/detection.md) - Face detection
|
||||
84
docs/recipes/batch-processing.md
Normal file
@@ -0,0 +1,84 @@
|
||||
# Batch Processing
|
||||
|
||||
Process multiple images efficiently.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Batch Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
def process_directory(input_dir, output_dir):
|
||||
"""Process all images in a directory."""
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for image_path in input_path.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Process and save results
|
||||
# ... your code here ...
|
||||
|
||||
# Usage
|
||||
process_directory("input_images/", "output_images/")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## With Progress Bar
|
||||
|
||||
```python
|
||||
from tqdm import tqdm
|
||||
|
||||
for image_path in tqdm(image_files, desc="Processing"):
|
||||
# ... process image ...
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
embeddings = {}
|
||||
for image_path in Path("faces/").glob("*.jpg"):
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
embeddings[image_path.stem] = embedding
|
||||
|
||||
# Save embeddings
|
||||
np.savez("embeddings.npz", **embeddings)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Search through embeddings
|
||||
- [Image Pipeline](image-pipeline.md) - Full analysis pipeline
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
92
docs/recipes/custom-models.md
Normal file
@@ -0,0 +1,92 @@
|
||||
# Custom Models
|
||||
|
||||
Add your own ONNX models to UniFace.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns for advanced users. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
UniFace is designed to be extensible. You can add custom ONNX models by:
|
||||
|
||||
1. Creating a class that inherits from the appropriate base class
|
||||
2. Implementing required methods
|
||||
3. Using the ONNX Runtime utilities provided by UniFace
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Detection Model
|
||||
|
||||
```python
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
super().__init__(confidence_threshold=confidence_threshold)
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
raise NotImplementedError
|
||||
|
||||
def postprocess(self, outputs, shape) -> list[Face]:
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
raise NotImplementedError
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self.preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
return self.postprocess(outputs, image.shape)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str, providers=None):
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
super().__init__(model_path, preprocessing, providers=providers)
|
||||
|
||||
# Optional: override preprocess() if your model expects custom normalization.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from my_module import MyDetector, MyRecognizer
|
||||
|
||||
# Use custom models
|
||||
detector = MyDetector("path/to/detection_model.onnx")
|
||||
recognizer = MyRecognizer("path/to/recognition_model.onnx")
|
||||
|
||||
# Use like built-in models
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Built-in detection models
|
||||
- [Recognition Module](../modules/recognition.md) - Built-in recognition models
|
||||
- [Concepts: Overview](../concepts/overview.md) - Architecture overview
|
||||
166
docs/recipes/face-search.md
Normal file
@@ -0,0 +1,166 @@
|
||||
# Face Search
|
||||
|
||||
Find and identify people in images and video streams.
|
||||
|
||||
UniFace supports two search approaches:
|
||||
|
||||
| Approach | Use case | Tool |
|
||||
| -------------------- | ------------------------------------------------ | ----------------------- |
|
||||
| **Reference search** | "Is this specific person in the video?" | `tools/search.py` |
|
||||
| **Vector search** | "Who is this?" against a database of known faces | `tools/faiss_search.py` |
|
||||
|
||||
---
|
||||
|
||||
## Reference Search (single image)
|
||||
|
||||
Compare every detected face against a single reference photo:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
ref_image = cv2.imread("reference.jpg")
|
||||
ref_faces = detector.detect(ref_image)
|
||||
ref_embedding = recognizer.get_normalized_embedding(ref_image, ref_faces[0].landmarks)
|
||||
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(query_image)
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(query_image, face.landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f"Match ({sim:.2f})" if sim > 0.4 else f"Unknown ({sim:.2f})"
|
||||
print(label)
|
||||
```
|
||||
|
||||
**CLI tool:**
|
||||
|
||||
```bash
|
||||
python tools/search.py --reference ref.jpg --source video.mp4
|
||||
python tools/search.py --reference ref.jpg --source 0 # webcam
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Vector Search (FAISS index)
|
||||
|
||||
For identifying faces against a database of many known people, use the
|
||||
[`FAISS`](../modules/indexing.md) vector store.
|
||||
|
||||
!!! info "Install extra"
|
||||
`bash
|
||||
pip install faiss-cpu
|
||||
`
|
||||
|
||||
### Build an index
|
||||
|
||||
Organise face images in person sub-folders:
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── alice/
|
||||
│ ├── 001.jpg
|
||||
│ └── 002.jpg
|
||||
├── bob/
|
||||
│ └── 001.jpg
|
||||
└── charlie/
|
||||
├── 001.jpg
|
||||
└── 002.jpg
|
||||
```
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
for person_dir in sorted(Path("dataset").iterdir()):
|
||||
if not person_dir.is_dir():
|
||||
continue
|
||||
for img_path in person_dir.glob("*.jpg"):
|
||||
image = cv2.imread(str(img_path))
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
emb = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(emb, {"person_id": person_dir.name, "source": str(img_path)})
|
||||
|
||||
store.save()
|
||||
print(f"Index saved: {store}")
|
||||
```
|
||||
|
||||
**CLI tool:**
|
||||
|
||||
```bash
|
||||
python tools/faiss_search.py build --faces-dir dataset/ --db-path ./my_index
|
||||
```
|
||||
|
||||
### Search against the index
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
image = cv2.imread("query.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
result, similarity = store.search(embedding, threshold=0.4)
|
||||
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} ({similarity:.2f})")
|
||||
else:
|
||||
print(f"Unknown ({similarity:.2f})")
|
||||
```
|
||||
|
||||
**CLI tool:**
|
||||
|
||||
```bash
|
||||
python tools/faiss_search.py run --db-path ./my_index --source video.mp4
|
||||
python tools/faiss_search.py run --db-path ./my_index --source 0 # webcam
|
||||
```
|
||||
|
||||
### Manage the index
|
||||
|
||||
```python
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
print(f"Total vectors: {len(store)}")
|
||||
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
|
||||
store.save()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Indexing Module](../modules/indexing.md) - Full `FAISS` API reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
293
docs/recipes/image-pipeline.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# Image Pipeline
|
||||
|
||||
A complete pipeline for processing images with detection, recognition, and attribute analysis.
|
||||
|
||||
---
|
||||
|
||||
## Basic Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
def process_image(image_path):
|
||||
"""Process a single image through the full pipeline."""
|
||||
# Load image
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Step 1: Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f"Found {len(faces)} face(s)")
|
||||
|
||||
results = []
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Step 2: Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Step 3: Predict attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
results.append({
|
||||
'face_id': i,
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'embedding': embedding,
|
||||
'gender': attrs.sex,
|
||||
'age': attrs.age
|
||||
})
|
||||
|
||||
print(f" Face {i+1}: {attrs.sex}, {attrs.age} years old")
|
||||
|
||||
# Visualize
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
return image, results
|
||||
|
||||
# Usage
|
||||
result_image, results = process_image("photo.jpg")
|
||||
cv2.imwrite("result.jpg", result_image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Using FaceAnalyzer
|
||||
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
age_gender=age_gender,
|
||||
)
|
||||
|
||||
# Process image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
# Access enriched Face objects
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Analysis Pipeline
|
||||
|
||||
Complete pipeline with all modules:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
# Initialize all models
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.age_gender = AgeGender()
|
||||
self.fairface = FairFace()
|
||||
self.landmarker = Landmark106()
|
||||
self.gaze = MobileGaze()
|
||||
self.parser = BiSeNet()
|
||||
self.spoofer = MiniFASNet()
|
||||
|
||||
def analyze(self, image):
|
||||
"""Run full analysis pipeline."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
result = {
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'landmarks_5': face.landmarks
|
||||
}
|
||||
|
||||
# Recognition embedding
|
||||
result['embedding'] = self.recognizer.get_normalized_embedding(
|
||||
image, face.landmarks
|
||||
)
|
||||
|
||||
# Attributes
|
||||
ag_result = self.age_gender.predict(image, face.bbox)
|
||||
result['age'] = ag_result.age
|
||||
result['gender'] = ag_result.sex
|
||||
|
||||
# FairFace attributes
|
||||
ff_result = self.fairface.predict(image, face.bbox)
|
||||
result['age_group'] = ff_result.age_group
|
||||
result['race'] = ff_result.race
|
||||
|
||||
# 106-point landmarks
|
||||
result['landmarks_106'] = self.landmarker.get_landmarks(
|
||||
image, face.bbox
|
||||
)
|
||||
|
||||
# Gaze estimation
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = self.gaze.estimate(face_crop)
|
||||
result['gaze_pitch'] = gaze_result.pitch
|
||||
result['gaze_yaw'] = gaze_result.yaw
|
||||
|
||||
# Face parsing
|
||||
if face_crop.size > 0:
|
||||
result['parsing_mask'] = self.parser.parse(face_crop)
|
||||
|
||||
# Anti-spoofing
|
||||
spoof_result = self.spoofer.predict(image, face.bbox)
|
||||
result['is_real'] = spoof_result.is_real
|
||||
result['spoof_confidence'] = spoof_result.confidence
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
# Usage
|
||||
pipeline = FaceAnalysisPipeline()
|
||||
results = pipeline.analyze(cv2.imread("photo.jpg"))
|
||||
|
||||
for i, r in enumerate(results):
|
||||
print(f"\nFace {i+1}:")
|
||||
print(f" Gender: {r['gender']}, Age: {r['age']}")
|
||||
print(f" Race: {r['race']}, Age Group: {r['age_group']}")
|
||||
print(f" Gaze: pitch={np.degrees(r['gaze_pitch']):.1f}°")
|
||||
print(f" Real: {r['is_real']} ({r['spoof_confidence']:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
gaze = MobileGaze()
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Age and gender
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
label = f"{attrs.sex}, {attrs.age}y"
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
|
||||
# Gaze
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = gaze.estimate(face_crop)
|
||||
draw_gaze(image, face.bbox, gaze_result.pitch, gaze_result.yaw)
|
||||
|
||||
# Confidence
|
||||
conf_label = f"{face.confidence:.0%}"
|
||||
cv2.putText(image, conf_label, (x1, y2 + 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
# Usage
|
||||
visualize_analysis("input.jpg", "output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JSON Output
|
||||
|
||||
Export results to JSON:
|
||||
|
||||
```python
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
def results_to_json(results):
|
||||
"""Convert analysis results to JSON-serializable format."""
|
||||
output = []
|
||||
|
||||
for r in results:
|
||||
item = {
|
||||
'bbox': r['bbox'].tolist(),
|
||||
'confidence': float(r['confidence']),
|
||||
'age': int(r['age']) if r.get('age') else None,
|
||||
'gender': r.get('gender'),
|
||||
'race': r.get('race'),
|
||||
'is_real': r.get('is_real'),
|
||||
'gaze': {
|
||||
'pitch_deg': float(np.degrees(r['gaze_pitch'])) if 'gaze_pitch' in r else None,
|
||||
'yaw_deg': float(np.degrees(r['gaze_yaw'])) if 'gaze_yaw' in r else None
|
||||
}
|
||||
}
|
||||
output.append(item)
|
||||
|
||||
return output
|
||||
|
||||
# Usage
|
||||
results = pipeline.analyze(image)
|
||||
json_data = results_to_json(results)
|
||||
|
||||
with open('results.json', 'w') as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Batch Processing](batch-processing.md) - Process multiple images
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Build a search system
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition details
|
||||
177
docs/recipes/video-webcam.md
Normal file
@@ -0,0 +1,177 @@
|
||||
# Video & Webcam
|
||||
|
||||
Real-time face analysis for video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# Setup output
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
# ... process and draw ...
|
||||
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
# Usage
|
||||
process_video("input.mp4", "output.mp4")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
To track faces across frames with persistent IDs, pair a detector with `BYTETracker`:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details on tracker parameters and tuning, see [Tracking](../modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
|
||||
```python
|
||||
PROCESS_EVERY_N = 3 # Process every 3rd frame
|
||||
frame_count = 0
|
||||
last_faces = []
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if frame_count % PROCESS_EVERY_N == 0:
|
||||
last_faces = detector.detect(frame)
|
||||
frame_count += 1
|
||||
# Draw last_faces...
|
||||
```
|
||||
|
||||
### FPS Counter
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
prev_time = time.time()
|
||||
while True:
|
||||
curr_time = time.time()
|
||||
fps = 1 / (curr_time - prev_time)
|
||||
prev_time = curr_time
|
||||
|
||||
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Tracking Module](../modules/tracking.md) - Face tracking with BYTETracker
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze estimation
|
||||
225
docs/stylesheets/extra.css
Normal file
@@ -0,0 +1,225 @@
|
||||
/* UniFace Documentation - Custom Styles */
|
||||
|
||||
/* ===== Hero Section ===== */
|
||||
|
||||
.md-content .hero {
|
||||
text-align: center;
|
||||
padding: 3rem 1rem 2rem;
|
||||
margin: 0 auto;
|
||||
max-width: 900px;
|
||||
}
|
||||
|
||||
.hero-title {
|
||||
font-size: 3.5rem !important;
|
||||
font-weight: 800 !important;
|
||||
margin-bottom: 0.5rem !important;
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #7c4dff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.hero-tagline {
|
||||
font-size: 1.5rem;
|
||||
color: var(--md-default-fg-color);
|
||||
margin-bottom: 0.5rem !important;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
margin-bottom: 1.5rem !important;
|
||||
font-weight: 400;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
margin: 0.5rem 0.25rem;
|
||||
padding: 0.7rem 1.5rem;
|
||||
font-weight: 600;
|
||||
border-radius: 8px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.hero .md-button--primary {
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #5c6bc0 100%);
|
||||
border: none;
|
||||
box-shadow: 0 4px 14px rgba(63, 81, 181, 0.4);
|
||||
}
|
||||
|
||||
.hero .md-button--primary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 6px 20px rgba(63, 81, 181, 0.5);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid var(--md-primary-fg-color);
|
||||
background: transparent;
|
||||
color: var(--md-primary-fg-color);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary):hover {
|
||||
background: var(--md-primary-fg-color);
|
||||
border-color: var(--md-primary-fg-color);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Badge styling in hero */
|
||||
.hero p a img {
|
||||
margin: 0 3px;
|
||||
height: 24px !important;
|
||||
}
|
||||
|
||||
/* ===== Feature Grid ===== */
|
||||
.feature-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.5rem;
|
||||
border-radius: 12px;
|
||||
background: var(--md-code-bg-color);
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
transition: all 0.3s ease;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.feature-card::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 3px;
|
||||
background: linear-gradient(90deg, var(--md-primary-fg-color), #7c4dff);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.feature-card:hover {
|
||||
transform: translateY(-4px);
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.1);
|
||||
border-color: var(--md-primary-fg-color--light);
|
||||
}
|
||||
|
||||
.feature-card:hover::before {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.feature-card h3 {
|
||||
margin-top: 0 !important;
|
||||
margin-bottom: 0.75rem !important;
|
||||
font-size: 1rem !important;
|
||||
font-weight: 600;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.feature-card p {
|
||||
margin: 0;
|
||||
font-size: 0.875rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.feature-card a {
|
||||
display: inline-block;
|
||||
margin-top: 0.75rem;
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* ===== Next Steps Grid (2 columns) ===== */
|
||||
.next-steps-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card {
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card h3 {
|
||||
font-size: 1.1rem !important;
|
||||
}
|
||||
|
||||
/* ===== Dark Mode Adjustments ===== */
|
||||
[data-md-color-scheme="slate"] .hero-title {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .feature-card:hover {
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
box-shadow: 0 4px 14px rgba(124, 77, 255, 0.4);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary:hover {
|
||||
box-shadow: 0 6px 20px rgba(124, 77, 255, 0.5);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid rgba(255, 255, 255, 0.3);
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary):hover {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
border-color: rgba(255, 255, 255, 0.5);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* ===== Responsive Design ===== */
|
||||
@media (max-width: 1200px) {
|
||||
.feature-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.hero-title {
|
||||
font-size: 2.5rem !important;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.feature-grid,
|
||||
.next-steps-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
display: block;
|
||||
margin: 0.5rem auto;
|
||||
max-width: 200px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.hero-title {
|
||||
font-size: 2rem !important;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.25rem;
|
||||
}
|
||||
}
|
||||
239
examples/02_face_alignment.ipynb
Normal file
271
examples/03_face_verification.ipynb
Normal file
366
examples/04_face_search.ipynb
Normal file
324
examples/05_face_analyzer.ipynb
Normal file
386
examples/06_face_parsing.ipynb
Normal file
327
examples/07_face_anonymization.ipynb
Normal file
268
examples/08_gaze_estimation.ipynb
Normal file
503
examples/09_face_segmentation.ipynb
Normal file
366
examples/10_face_vector_store.ipynb
Normal file
168
mkdocs.yml
Normal file
@@ -0,0 +1,168 @@
|
||||
site_name: UniFace
|
||||
site_description: All-in-One Face Analysis Library with ONNX Runtime
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_url: https://yakhyo.github.io/uniface
|
||||
|
||||
repo_name: yakhyo/uniface
|
||||
repo_url: https://github.com/yakhyo/uniface
|
||||
edit_uri: edit/main/docs/
|
||||
|
||||
copyright: Copyright © 2025 Yakhyokhuja Valikhujaev
|
||||
|
||||
theme:
|
||||
name: material
|
||||
custom_dir: docs/overrides
|
||||
palette:
|
||||
- media: "(prefers-color-scheme)"
|
||||
toggle:
|
||||
icon: material/link
|
||||
name: Switch to light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: black
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch-off-outline
|
||||
name: Switch to system preference
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- navigation.footer
|
||||
- navigation.indexes
|
||||
- navigation.instant
|
||||
- navigation.tracking
|
||||
- search.suggest
|
||||
- search.highlight
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- announce.dismiss
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
logo: material/book-open-page-variant
|
||||
repo: fontawesome/brands/git-alt
|
||||
admonition:
|
||||
note: octicons/tag-16
|
||||
abstract: octicons/checklist-16
|
||||
info: octicons/info-16
|
||||
tip: octicons/squirrel-16
|
||||
success: octicons/check-16
|
||||
question: octicons/question-16
|
||||
warning: octicons/alert-16
|
||||
failure: octicons/x-circle-16
|
||||
danger: octicons/zap-16
|
||||
bug: octicons/bug-16
|
||||
example: octicons/beaker-16
|
||||
quote: octicons/quote-16
|
||||
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/yakhyo
|
||||
- icon: fontawesome/brands/python
|
||||
link: https://pypi.org/project/uniface/
|
||||
- icon: fontawesome/brands/x-twitter
|
||||
link: https://x.com/y_valikhujaev
|
||||
analytics:
|
||||
provider: google
|
||||
property: G-FGEHR2K5ZE
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- footnotes
|
||||
- attr_list
|
||||
- md_in_html
|
||||
- def_list
|
||||
- tables
|
||||
- toc:
|
||||
permalink: false
|
||||
toc_depth: 3
|
||||
- pymdownx.superfences:
|
||||
custom_fences:
|
||||
- name: mermaid
|
||||
class: mermaid
|
||||
format: !!python/name:pymdownx.superfences.fence_code_format
|
||||
- pymdownx.details
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- pymdownx.tasklist:
|
||||
custom_checkbox: true
|
||||
- pymdownx.keys
|
||||
- pymdownx.mark
|
||||
- pymdownx.critic
|
||||
- pymdownx.caret
|
||||
- pymdownx.tilde
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- git-committers:
|
||||
repository: yakhyo/uniface
|
||||
branch: main
|
||||
token: !ENV MKDOCS_GIT_COMMITTERS_APIKEY
|
||||
- git-revision-date-localized:
|
||||
enable_creation_date: true
|
||||
type: timeago
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Getting Started:
|
||||
- Installation: installation.md
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Datasets: datasets.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
- Face Search: recipes/face-search.md
|
||||
- Batch Processing: recipes/batch-processing.md
|
||||
- Anonymize Stream: recipes/anonymize-stream.md
|
||||
- Custom Models: recipes/custom-models.md
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Tracking: modules/tracking.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Indexing: modules/indexing.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
- Coordinate Systems: concepts/coordinate-systems.md
|
||||
- Execution Providers: concepts/execution-providers.md
|
||||
- Model Cache: concepts/model-cache-offline.md
|
||||
- Thresholds: concepts/thresholds-calibration.md
|
||||
- Resources:
|
||||
- Contributing: contributing.md
|
||||
- License: license-attribution.md
|
||||
- Releases: https://github.com/yakhyo/uniface/releases
|
||||
- Discussions: https://github.com/yakhyo/uniface/discussions
|
||||
115
pyproject.toml
@@ -1,22 +1,54 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.1.1"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
version = "3.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Tracking, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"face-tracking",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
"deep-learning",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"scipy>=1.7.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
@@ -25,23 +57,84 @@ gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://yakhyo.github.io/uniface"
|
||||
"Quick Start" = "https://yakhyo.github.io/uniface/quickstart/"
|
||||
"Model Zoo" = "https://yakhyo.github.io/uniface/models/"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = {} }
|
||||
packages = { find = { where = ["."], include = ["uniface*"] } }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"*.egg-info",
|
||||
".venv",
|
||||
"venv",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
"*.ipynb",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
scipy>=1.7.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
@@ -1,68 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--detector` | Choose detector: `retinaface` or `scrfd` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.6) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
@@ -1,96 +0,0 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f"*.{ext}"))
|
||||
files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Input directory")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output directory")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions")
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(","))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No images found with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Found {len(image_files)} images")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc="Processing", unit="img"):
|
||||
out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}"
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f"\nFailed: {img_path.name}")
|
||||
|
||||
print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,124 +0,0 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = "outputs",
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face["bbox"])
|
||||
print(f" Face {i + 1}: {gender}, {age} years old")
|
||||
draw_age_gender_label(image, face["bbox"], gender, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face["bbox"]) # predict per face
|
||||
draw_age_gender_label(frame, face["bbox"], gender, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,95 +0,0 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face["bbox"] for face in faces]
|
||||
scores = [face["confidence"] for face in faces]
|
||||
landmarks = [face["landmarks"] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.method == "retinaface" else SCRFD()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,97 +0,0 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face search using a reference image")
|
||||
parser.add_argument("--image", type=str, required=True, help="Reference face image")
|
||||
parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold")
|
||||
parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f"Loading reference: {args.image}")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,117 +0,0 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i + 1}: {len(landmarks)} landmarks")
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f"Face {i + 1}",
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,103 +0,0 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...")
|
||||
|
||||
landmarks = np.array(faces[0]["landmarks"]) # 5-point landmarks for alignment
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
print(f" Embedding shape: {embedding.shape}")
|
||||
print(f" L2 norm (raw): {np.linalg.norm(embedding):.4f}")
|
||||
print(f" L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print("Error: Failed to load one or both images")
|
||||
return
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print("Error: No faces detected in one or both images")
|
||||
return
|
||||
|
||||
landmarks1 = np.array(faces1[0]["landmarks"])
|
||||
landmarks2 = np.array(faces2[0]["landmarks"])
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison")
|
||||
parser.add_argument("--image", type=str, help="Single image for embedding extraction")
|
||||
parser.add_argument("--image1", type=str, help="First image for comparison")
|
||||
parser.add_argument("--image2", type=str, help="Second image for comparison")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide --image or both --image1 and --image2")
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,107 +0,0 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
|
||||
print(f"Output: {output_path}")
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
print("\nCancelled by user")
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
|
||||
print(f"Saved: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Input video path")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output video path")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--preview", action="store_true", help="Show live preview")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,7 +1,14 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -20,23 +27,26 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, "AgeGender model initialization failed."
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender, str), f"Gender should be string, got {type(gender)}"
|
||||
assert isinstance(age, int), f"Age should be int, got {type(age)}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ["Male", "Female"], f"Gender should be 'Male' or 'Female', got '{gender}'"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +57,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender in ["Male", "Female"], f"Failed for bbox {bbox}"
|
||||
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +67,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ["Male", "Female"], f"Failed for image size {size}"
|
||||
assert 0 <= age <= 120, f"Age out of range for image size {size}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender1 == gender2, "Same input should produce same gender prediction"
|
||||
assert age1 == age2, "Same input should produce same age prediction"
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender in ["Male", "Female"], "Should work with bbox as list"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender in ["Male", "Female"], "Should work with bbox as numpy array"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,24 +103,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, "Should have 3 predictions"
|
||||
for gender, age in results:
|
||||
assert gender in ["Male", "Female"]
|
||||
assert 0 <= age <= 120
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f"Age should be non-negative, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f"{gender}, {age}y"
|
||||
assert isinstance(text, str), "Should be able to format as string"
|
||||
assert "Male" in text or "Female" in text, "Text should contain gender"
|
||||
assert "y" in text, "Text should contain 'y' for years"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
61
tests/test_draw.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
|
||||
def _compute_gaze_delta(bbox: np.ndarray, pitch: float, yaw: float) -> tuple[int, int]:
|
||||
"""Replicate draw_gaze dx/dy math for verification."""
|
||||
x_min, _, x_max, _ = map(int, bbox[:4])
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(yaw) * np.cos(pitch))
|
||||
dy = int(-length * np.sin(pitch))
|
||||
return dx, dy
|
||||
|
||||
|
||||
def test_draw_gaze_yaw_only_moves_horizontally():
|
||||
"""Yaw-only input (pitch=0) should produce horizontal displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.5
|
||||
pitch = 0.0
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx != 0, 'Yaw-only should produce horizontal displacement'
|
||||
assert dy == 0, 'Yaw-only should produce zero vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_pitch_only_moves_vertically():
|
||||
"""Pitch-only input (yaw=0) should produce vertical displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.0
|
||||
pitch = 0.5
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx == 0, 'Pitch-only should produce zero horizontal displacement'
|
||||
assert dy != 0, 'Pitch-only should produce vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_modifies_image():
|
||||
"""draw_gaze should modify the image in place."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
original = image.copy()
|
||||
draw_gaze(image, bbox, 0.3, 0.3)
|
||||
|
||||
assert not np.array_equal(image, original), 'draw_gaze should modify the image'
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -5,10 +12,10 @@ from uniface import (
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.spoofing import MiniFASNet, create_spoofer
|
||||
|
||||
|
||||
# create_detector tests
|
||||
@@ -16,16 +23,16 @@ def test_create_detector_retinaface():
|
||||
"""
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector("retinaface")
|
||||
assert detector is not None, "Failed to create RetinaFace detector"
|
||||
detector = create_detector('retinaface')
|
||||
assert detector is not None, 'Failed to create RetinaFace detector'
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
"""
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector("scrfd")
|
||||
assert detector is not None, "Failed to create SCRFD detector"
|
||||
detector = create_detector('scrfd')
|
||||
assert detector is not None, 'Failed to create SCRFD detector'
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
@@ -33,12 +40,12 @@ def test_create_detector_with_config():
|
||||
Test creating detector with custom configuration.
|
||||
"""
|
||||
detector = create_detector(
|
||||
"retinaface",
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, "Failed to create detector with custom config"
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
@@ -46,15 +53,15 @@ def test_create_detector_invalid_method():
|
||||
Test that invalid detector method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_detector("invalid_method")
|
||||
create_detector('invalid_method')
|
||||
|
||||
|
||||
def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector("scrfd", model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert detector is not None, "Failed to create SCRFD with specific model"
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
@@ -62,24 +69,24 @@ def test_create_recognizer_arcface():
|
||||
"""
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("arcface")
|
||||
assert recognizer is not None, "Failed to create ArcFace recognizer"
|
||||
recognizer = create_recognizer('arcface')
|
||||
assert recognizer is not None, 'Failed to create ArcFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
"""
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("mobileface")
|
||||
assert recognizer is not None, "Failed to create MobileFace recognizer"
|
||||
recognizer = create_recognizer('mobileface')
|
||||
assert recognizer is not None, 'Failed to create MobileFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
"""
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer("sphereface")
|
||||
assert recognizer is not None, "Failed to create SphereFace recognizer"
|
||||
recognizer = create_recognizer('sphereface')
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
@@ -87,7 +94,7 @@ def test_create_recognizer_invalid_method():
|
||||
Test that invalid recognizer method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_recognizer("invalid_method")
|
||||
create_recognizer('invalid_method')
|
||||
|
||||
|
||||
# create_landmarker tests
|
||||
@@ -95,8 +102,8 @@ def test_create_landmarker():
|
||||
"""
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker("2d106det")
|
||||
assert landmarker is not None, "Failed to create Landmark106 detector"
|
||||
landmarker = create_landmarker('2d106det')
|
||||
assert landmarker is not None, 'Failed to create Landmark106 detector'
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
@@ -104,7 +111,7 @@ def test_create_landmarker_default():
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, "Failed to create default landmarker"
|
||||
assert landmarker is not None, 'Failed to create default landmarker'
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
@@ -112,63 +119,7 @@ def test_create_landmarker_invalid_method():
|
||||
Test that invalid landmarker method raises an error.
|
||||
"""
|
||||
with pytest.raises((ValueError, KeyError)):
|
||||
create_landmarker("invalid_method")
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="retinaface")
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="scrfd")
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method="retinaface", conf_thresh=0.8)
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face["confidence"] >= 0.8, "All detections should meet confidence threshold"
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list with default method"
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method="retinaface")
|
||||
|
||||
assert isinstance(faces, list), "Should return a list even for empty image"
|
||||
assert len(faces) == 0, "Should detect no faces in blank image"
|
||||
create_landmarker('invalid_method')
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
@@ -178,8 +129,8 @@ def test_list_available_detectors():
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), "Should return a dictionary of detectors"
|
||||
assert len(detectors) > 0, "Should have at least one detector available"
|
||||
assert isinstance(detectors, dict), 'Should return a dictionary of detectors'
|
||||
assert len(detectors) > 0, 'Should have at least one detector available'
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
@@ -189,8 +140,8 @@ def test_list_available_detectors_contents():
|
||||
detectors = list_available_detectors()
|
||||
|
||||
# Should include at least these detectors
|
||||
assert "retinaface" in detectors, "Should include 'retinaface'"
|
||||
assert "scrfd" in detectors, "Should include 'scrfd'"
|
||||
assert 'retinaface' in detectors, "Should include 'retinaface'"
|
||||
assert 'scrfd' in detectors, "Should include 'scrfd'"
|
||||
|
||||
|
||||
# Integration tests
|
||||
@@ -198,56 +149,56 @@ def test_detector_inference_from_factory():
|
||||
"""
|
||||
Test that detector created from factory can perform inference.
|
||||
"""
|
||||
detector = create_detector("retinaface")
|
||||
detector = create_detector('retinaface')
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detector should return list of faces"
|
||||
assert isinstance(faces, list), 'Detector should return list of faces'
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
"""
|
||||
Test that recognizer created from factory can perform inference.
|
||||
"""
|
||||
recognizer = create_recognizer("arcface")
|
||||
recognizer = create_recognizer('arcface')
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, "Recognizer should return embedding"
|
||||
assert embedding.shape[1] == 512, "Should return 512-dimensional embedding"
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
"""
|
||||
Test that landmarker created from factory can perform inference.
|
||||
"""
|
||||
landmarker = create_landmarker("2d106det")
|
||||
landmarker = create_landmarker('2d106det')
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, "Landmarker should return landmarks"
|
||||
assert landmarks.shape == (106, 2), "Should return 106 landmarks"
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
"""
|
||||
Test that multiple detectors can be created independently.
|
||||
"""
|
||||
detector1 = create_detector("retinaface")
|
||||
detector2 = create_detector("scrfd")
|
||||
detector1 = create_detector('retinaface')
|
||||
detector2 = create_detector('scrfd')
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, "Should create separate instances"
|
||||
assert detector1 is not detector2, 'Should create separate instances'
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector("retinaface", conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector("retinaface", conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
@@ -263,12 +214,25 @@ def test_factory_returns_correct_types():
|
||||
"""
|
||||
Test that factory functions return instances of the correct types.
|
||||
"""
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface import ArcFace, Landmark106, RetinaFace
|
||||
|
||||
detector = create_detector("retinaface")
|
||||
recognizer = create_recognizer("arcface")
|
||||
landmarker = create_landmarker("2d106det")
|
||||
detector = create_detector('retinaface')
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
|
||||
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
|
||||
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"
|
||||
assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance"
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
|
||||
# create_spoofer tests
|
||||
def test_create_spoofer_default():
|
||||
"""Test creating a spoofer with default parameters."""
|
||||
spoofer = create_spoofer()
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
|
||||
def test_create_spoofer_with_providers():
|
||||
"""Test that create_spoofer forwards providers kwarg without TypeError."""
|
||||
spoofer = create_spoofer(providers=['CPUExecutionProvider'])
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -20,17 +27,17 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, "Landmark106 model initialization failed."
|
||||
assert landmark_model is not None, 'Landmark106 model initialization failed.'
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}"
|
||||
assert landmarks.shape == (106, 2), f'Expected shape (106, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}"
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
@@ -45,8 +52,8 @@ def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds"
|
||||
assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds"
|
||||
assert x_in_bounds >= 95, f'Only {x_in_bounds}/106 x-coordinates within bounds'
|
||||
assert y_in_bounds >= 95, f'Only {y_in_bounds}/106 y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
@@ -58,22 +65,22 @@ def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape"
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type"
|
||||
assert landmarks_int.shape == (106, 2), 'Integer conversion should preserve shape'
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], 'Should convert to integer type'
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks"
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
@@ -82,19 +89,19 @@ def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for image size {size}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as list"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as numpy array"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
@@ -103,5 +110,5 @@ def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, "Landmarks should have variation in x-coordinates"
|
||||
assert y_variance > 0, "Landmarks should have variation in y-coordinates"
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
|
||||
269
tests/test_parsing.py
Normal file
@@ -0,0 +1,269 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
from uniface.parsing import BiSeNet, XSeg, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
# XSeg Tests
|
||||
|
||||
|
||||
def test_xseg_initialization():
|
||||
"""Test XSeg initialization."""
|
||||
parser = XSeg()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (256, 256)
|
||||
assert parser.align_size == 256
|
||||
assert parser.blur_sigma == 0
|
||||
|
||||
|
||||
def test_xseg_with_custom_params():
|
||||
"""Test XSeg with custom parameters."""
|
||||
parser = XSeg(align_size=512, blur_sigma=5)
|
||||
assert parser.align_size == 512
|
||||
assert parser.blur_sigma == 5
|
||||
|
||||
|
||||
def test_xseg_preprocess():
|
||||
"""Test XSeg preprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_crop)
|
||||
|
||||
assert preprocessed.shape == (1, 256, 256, 3) # NHWC format
|
||||
assert preprocessed.dtype == np.float32
|
||||
assert preprocessed.min() >= 0
|
||||
assert preprocessed.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_postprocess():
|
||||
"""Test XSeg postprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create dummy model output (NHWC format)
|
||||
dummy_output = np.random.rand(1, 256, 256, 1).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, crop_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_aligned():
|
||||
"""Test XSeg parse_aligned method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_with_landmarks():
|
||||
"""Test XSeg parse method with landmarks."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200], # left eye
|
||||
[390, 200], # right eye
|
||||
[320, 280], # nose
|
||||
[260, 350], # left mouth
|
||||
[380, 350], # right mouth
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
assert mask.shape == (480, 640)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_invalid_landmarks():
|
||||
"""Test XSeg parse with invalid landmarks shape."""
|
||||
parser = XSeg()
|
||||
image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Wrong shape
|
||||
invalid_landmarks = np.array([[0, 0], [1, 1], [2, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match='Landmarks must have shape'):
|
||||
parser.parse(image, landmarks=invalid_landmarks)
|
||||
|
||||
|
||||
def test_xseg_parse_with_inverse():
|
||||
"""Test XSeg parse_with_inverse method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200],
|
||||
[390, 200],
|
||||
[320, 280],
|
||||
[260, 350],
|
||||
[380, 350],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse with inverse
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert face_crop.shape == (256, 256, 3)
|
||||
assert inverse_matrix.shape == (2, 3)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_enum():
|
||||
"""Test factory function with XSeg enum."""
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_string():
|
||||
"""Test factory function with XSeg string."""
|
||||
parser = create_face_parser('xseg')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -58,7 +65,7 @@ def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, "ArcFace model initialization failed."
|
||||
assert arcface_model is not None, 'ArcFace model initialization failed.'
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
@@ -68,8 +75,8 @@ def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
@@ -83,7 +90,7 @@ def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
@@ -91,7 +98,7 @@ def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}"
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
@@ -101,7 +108,7 @@ def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), "Same input should produce same embedding"
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
@@ -109,7 +116,7 @@ def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, "MobileFace model initialization failed."
|
||||
assert mobileface_model is not None, 'MobileFace model initialization failed.'
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
@@ -119,8 +126,8 @@ def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
@@ -132,7 +139,7 @@ def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
@@ -140,7 +147,7 @@ def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, "SphereFace model initialization failed."
|
||||
assert sphereface_model is not None, 'SphereFace model initialization failed.'
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
@@ -150,8 +157,8 @@ def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
@@ -163,7 +170,7 @@ def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
@@ -176,7 +183,7 @@ def test_different_models_different_embeddings(arcface_model, mobileface_model,
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), "Different models should produce different embeddings"
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), 'Different models should produce different embeddings'
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
@@ -197,7 +204,7 @@ def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
@@ -212,4 +219,4 @@ def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}"
|
||||
assert similarity > 0.99, f'Same face should have similarity > 0.99, got {similarity}'
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,35 +16,35 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
assert retinaface_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
@@ -45,11 +52,11 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,33 +16,33 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, "Model initialization failed."
|
||||
assert scrfd_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
@@ -43,14 +50,14 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
@@ -59,13 +66,13 @@ def test_different_input_sizes(scrfd_model):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f"Should return list for size {size}"
|
||||
assert isinstance(faces, list), f'Should return list for size {size}'
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, "SCRFD 10G model initialization failed."
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), "SCRFD 10G should return list of detections."
|
||||
assert isinstance(faces, list), 'SCRFD 10G should return list of detections.'
|
||||
|
||||
281
tests/test_types.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,10 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -39,7 +46,7 @@ def test_compute_similarity_same_embedding():
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}"
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f'Self-similarity should be 1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
@@ -56,7 +63,7 @@ def test_compute_similarity_range():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
@@ -71,7 +78,7 @@ def test_compute_similarity_orthogonal():
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}"
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f'Orthogonal embeddings should have similarity 0.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
@@ -84,7 +91,7 @@ def test_compute_similarity_opposite():
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}"
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f'Opposite embeddings should have similarity -1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
@@ -101,7 +108,7 @@ def test_compute_similarity_symmetry():
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), "Similarity should be symmetric"
|
||||
assert np.isclose(sim_12, sim_21), 'Similarity should be symmetric'
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
@@ -116,7 +123,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}"
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -126,7 +133,7 @@ def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}"
|
||||
assert aligned.shape == (112, 112, 3), f'Expected shape (112, 112, 3), got {aligned.shape}'
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
@@ -135,7 +142,7 @@ def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}"
|
||||
assert aligned.dtype == np.uint8, f'Expected uint8, got {aligned.dtype}'
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
@@ -147,7 +154,7 @@ def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f"Failed for size {size}"
|
||||
assert aligned.shape == (*size, 3), f'Failed for size {size}'
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
@@ -157,7 +164,7 @@ def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face"
|
||||
assert np.allclose(aligned1, aligned2), 'Same input should produce same aligned face'
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
@@ -175,7 +182,7 @@ def test_face_alignment_landmarks_as_list(mock_image):
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with landmarks as array"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with landmarks as array'
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
@@ -184,8 +191,8 @@ def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), "Pixel values should be >= 0"
|
||||
assert np.all(aligned <= 255), "Pixel values should be <= 255"
|
||||
assert np.all(aligned >= 0), 'Pixel values should be >= 0'
|
||||
assert np.all(aligned <= 255), 'Pixel values should be <= 255'
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
@@ -195,7 +202,7 @@ def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), "Aligned face should have some non-zero pixels"
|
||||
assert np.any(aligned > 0), 'Aligned face should have some non-zero pixels'
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
@@ -220,7 +227,7 @@ def test_face_alignment_from_different_positions(mock_image):
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}"
|
||||
assert aligned.shape == (112, 112, 3), f'Failed for landmarks at {landmarks[0]}'
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
@@ -240,7 +247,7 @@ def test_face_alignment_landmark_count(mock_image):
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with 5-point landmarks'
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
@@ -259,4 +266,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
127
tools/README.md
Normal file
@@ -0,0 +1,127 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detect.py` | Face detection on image, video, or webcam |
|
||||
| `track.py` | Face tracking on video with ByteTrack |
|
||||
| `analyze.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognize.py` | Face embedding extraction and comparison |
|
||||
| `search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `attribute.py` | Age and gender prediction |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `parse.py` | Face semantic segmentation (BiSeNet) |
|
||||
| `xseg.py` | Face segmentation (XSeg) |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
Most tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detect.py --source assets/test.jpg # image
|
||||
python tools/detect.py --source video.mp4 # video
|
||||
python tools/detect.py --source 0 # webcam
|
||||
|
||||
# Face tracking
|
||||
python tools/track.py --source video.mp4
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
python tools/track.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/attribute.py --source assets/test.jpg
|
||||
python tools/attribute.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/emotion.py --source assets/test.jpg
|
||||
python tools/emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze.py --source assets/test.jpg
|
||||
python tools/gaze.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing (BiSeNet)
|
||||
python tools/parse.py --source assets/test.jpg
|
||||
python tools/parse.py --source 0
|
||||
|
||||
# Face segmentation (XSeg)
|
||||
python tools/xseg.py --source assets/test.jpg
|
||||
python tools/xseg.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/analyze.py --source assets/test.jpg
|
||||
python tools/analyze.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognize.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognize.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/search.py --reference person.jpg --source 0
|
||||
python tools/search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detect.py --source assets/test.jpg
|
||||
```
|
||||
29
tools/_common.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera.
|
||||
|
||||
Args:
|
||||
source: File path or camera ID string (e.g. ``"0"``).
|
||||
|
||||
Returns:
|
||||
One of ``"image"``, ``"video"``, ``"camera"``, or ``"unknown"``.
|
||||
"""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
suffix = Path(source).suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
if suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
return 'unknown'
|
||||
226
tools/analyze.py
Normal file
@@ -0,0 +1,226 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/analyze.py --source path/to/image.jpg
|
||||
python tools/analyze.py --source path/to/video.mp4
|
||||
python tools/analyze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
265
tools/anonymize.py
Normal file
@@ -0,0 +1,265 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python tools/anonymize.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python tools/anonymize.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
198
tools/attribute.py
Normal file
@@ -0,0 +1,198 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/attribute.py --source path/to/image.jpg
|
||||
python tools/attribute.py --source path/to/video.mp4
|
||||
python tools/attribute.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
105
tools/batch_process.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f'*.{ext}'))
|
||||
files.extend(input_dir.glob(f'*.{ext.upper()}'))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Batch process images with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input directory')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output directory')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f'No images found with extensions {extensions}')
|
||||
return
|
||||
|
||||
print(f'Found {len(image_files)} images')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc='Processing', unit='img'):
|
||||
out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f'\nFailed: {img_path.name}')
|
||||
|
||||
print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
226
tools/detect.py
Normal file
@@ -0,0 +1,226 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detect.py --source path/to/image.jpg
|
||||
python tools/detect.py --source path/to/video.mp4
|
||||
python tools/detect.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
t0 = time.perf_counter()
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
inference_fps = 1.0 / max(time.perf_counter() - t0, 1e-9)
|
||||
cv2.putText(frame, f'FPS: {inference_fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
prev_time = time.perf_counter()
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
curr_time = time.perf_counter()
|
||||
fps = 1.0 / max(curr_time - prev_time, 1e-9)
|
||||
prev_time = curr_time
|
||||
cv2.putText(frame, f'FPS: {fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--detector',
|
||||
'--method',
|
||||
type=str,
|
||||
default='retinaface',
|
||||
choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face'],
|
||||
)
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview during video processing')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.detector == 'scrfd':
|
||||
detector = SCRFD()
|
||||
elif args.detector == 'yolov5face':
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
else: # yolov8face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -13,48 +13,48 @@ from uniface.constants import (
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
MODEL_TYPES = {
|
||||
"retinaface": RetinaFaceWeights,
|
||||
"sphereface": SphereFaceWeights,
|
||||
"mobileface": MobileFaceWeights,
|
||||
"arcface": ArcFaceWeights,
|
||||
"scrfd": SCRFDWeights,
|
||||
"ddamfn": DDAMFNWeights,
|
||||
"agegender": AgeGenderWeights,
|
||||
"landmark": LandmarkWeights,
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
print(f'Downloading: {weight.value}')
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f" Done: {weight.value}")
|
||||
print(f' Done: {weight.value}')
|
||||
except Exception as e:
|
||||
print(f" Failed: {e}")
|
||||
print(f' Failed: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download model weights")
|
||||
parser = argparse.ArgumentParser(description='Download model weights')
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
'--model-type',
|
||||
type=str,
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help="Model type to download. If not specified, downloads all.",
|
||||
help='Model type to download. If not specified, downloads all.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_type:
|
||||
print(f"Downloading {args.model_type} models...")
|
||||
print(f'Downloading {args.model_type} models...')
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
print("Downloading all models...")
|
||||
print('Downloading all models...')
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f"\n{name}:")
|
||||
print(f'\n{name}:')
|
||||
download_models(model_enum)
|
||||
|
||||
print("\nDone!")
|
||||
print('\nDone!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
198
tools/emotion.py
Normal file
@@ -0,0 +1,198 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/emotion.py --source path/to/image.jpg
|
||||
python tools/emotion.py --source path/to/video.mp4
|
||||
python tools/emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
198
tools/fairface.py
Normal file
@@ -0,0 +1,198 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
208
tools/faiss_search.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FAISS index build and multi-identity face search.
|
||||
|
||||
Build a vector index from a directory of person sub-folders, then search
|
||||
against it in a video or webcam stream.
|
||||
|
||||
Usage:
|
||||
python tools/faiss_search.py build --faces-dir dataset/ --db-path ./vector_index
|
||||
python tools/faiss_search.py run --db-path ./vector_index --source video.mp4
|
||||
python tools/faiss_search.py run --db-path ./vector_index --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import IMAGE_EXTENSIONS, get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import create_detector, create_recognizer
|
||||
from uniface.draw import draw_corner_bbox, draw_text_label
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
|
||||
def _draw_face(image, bbox, text: str, color: tuple[int, int, int]) -> None:
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
font_scale = max(0.4, min(0.7, (y2 - y1) / 200))
|
||||
draw_corner_bbox(image, (x1, y1, x2, y2), color=color, thickness=thickness)
|
||||
draw_text_label(image, text, x1, y1, bg_color=color, font_scale=font_scale)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, store: FAISS, threshold: float = 0.4):
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
return frame
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(frame, face.landmarks)
|
||||
result, sim = store.search(embedding, threshold=threshold)
|
||||
|
||||
text = f'{result["person_id"]} ({sim:.2f})' if result else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if result else (0, 0, 255)
|
||||
_draw_face(frame, face.bbox, text, color)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, store: FAISS, video_path: str, save_dir: str, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_faiss_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, store, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, store: FAISS, camera_id: int = 0, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, store, threshold)
|
||||
|
||||
cv2.imshow('Vector Search', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def build(args: argparse.Namespace) -> None:
|
||||
faces_dir = Path(args.faces_dir)
|
||||
if not faces_dir.is_dir():
|
||||
print(f"Error: '{faces_dir}' is not a directory")
|
||||
return
|
||||
|
||||
detector = create_detector()
|
||||
recognizer = create_recognizer()
|
||||
store = FAISS(db_path=args.db_path)
|
||||
|
||||
persons = sorted(p.name for p in faces_dir.iterdir() if p.is_dir())
|
||||
if not persons:
|
||||
print(f"Error: No sub-folders found in '{faces_dir}'")
|
||||
return
|
||||
|
||||
print(f'Found {len(persons)} persons: {", ".join(persons)}')
|
||||
|
||||
total_added = 0
|
||||
for person_id in persons:
|
||||
person_dir = faces_dir / person_id
|
||||
images = [f for f in person_dir.iterdir() if f.suffix.lower() in IMAGE_EXTENSIONS]
|
||||
|
||||
added = 0
|
||||
for img_path in images:
|
||||
image = cv2.imread(str(img_path))
|
||||
if image is None:
|
||||
print(f' Warning: Failed to read {img_path}, skipping')
|
||||
continue
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print(f' Warning: No face detected in {img_path}, skipping')
|
||||
continue
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(embedding, {'person_id': person_id, 'source': str(img_path)})
|
||||
added += 1
|
||||
|
||||
total_added += added
|
||||
if added:
|
||||
print(f' {person_id}: {added} embeddings added')
|
||||
else:
|
||||
print(f' {person_id}: no valid faces found')
|
||||
|
||||
store.save()
|
||||
print(f'\nIndex saved to {args.db_path} ({total_added} vectors, {len(persons)} persons)')
|
||||
|
||||
|
||||
def run(args: argparse.Namespace) -> None:
|
||||
detector = create_detector()
|
||||
recognizer = create_recognizer()
|
||||
|
||||
store = FAISS(db_path=args.db_path)
|
||||
if not store.load():
|
||||
print(f"Error: No index found at '{args.db_path}'")
|
||||
return
|
||||
print(f'Loaded FAISS index: {store}')
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, store, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, store, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='FAISS vector search')
|
||||
sub = parser.add_subparsers(dest='command', required=True)
|
||||
|
||||
build_p = sub.add_parser('build', help='Build a FAISS index from person sub-folders')
|
||||
build_p.add_argument('--faces-dir', type=str, required=True, help='Directory with person sub-folders')
|
||||
build_p.add_argument('--db-path', type=str, default='./vector_index', help='Where to save the index')
|
||||
|
||||
run_p = sub.add_parser('run', help='Search faces against a FAISS index')
|
||||
run_p.add_argument('--db-path', type=str, required=True, help='Path to saved FAISS index')
|
||||
run_p.add_argument('--source', type=str, required=True, help='Video path or camera ID')
|
||||
run_p.add_argument('--threshold', type=float, default=0.4, help='Similarity threshold')
|
||||
run_p.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'build':
|
||||
build(args)
|
||||
elif args.command == 'run':
|
||||
run(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
174
tools/gaze.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze.py --source path/to/image.jpg
|
||||
python tools/gaze.py --source path/to/video.mp4
|
||||
python tools/gaze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_gaze
|
||||
from uniface.gaze import MobileGaze
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
172
tools/landmarks.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||