mirror of
https://github.com/yakhyo/uniface.git
synced 2026-01-06 14:32:30 +00:00
Compare commits
28 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
07c8bd7b24 | ||
|
|
68179d1e2d | ||
|
|
99b35dddb4 | ||
|
|
3b6d0a35a9 | ||
|
|
0bd808bcef | ||
|
|
9edf8b6b3d | ||
|
|
efb40f2e91 | ||
|
|
376e7bc488 | ||
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc | ||
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 | ||
|
|
da8a5cf35b | ||
|
|
3982d677a9 | ||
|
|
f4458f0550 | ||
|
|
637316f077 |
BIN
.github/logos/gaze_crop.png
vendored
Normal file
BIN
.github/logos/gaze_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
Normal file
BIN
.github/logos/gaze_org.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 673 KiB |
38
.github/workflows/ci.yml
vendored
38
.github/workflows/ci.yml
vendored
@@ -10,14 +10,31 @@ on:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +44,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +55,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +73,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +95,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
38
.github/workflows/docs.yml
vendored
Normal file
38
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Deploy docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git-committers and git-revision-date plugins
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs
|
||||
17
.github/workflows/publish.yml
vendored
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
41
.pre-commit-config.yaml
Normal file
41
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^mkdocs.yml$
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
190
CONTRIBUTING.md
Normal file
190
CONTRIBUTING.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Contributing to UniFace
|
||||
|
||||
Thank you for considering contributing to UniFace! We welcome contributions of all kinds.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
- Use GitHub Issues to report bugs or suggest features
|
||||
- Include clear descriptions and reproducible examples
|
||||
- Check existing issues before creating new ones
|
||||
|
||||
### Pull Requests
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.11+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
333
MODELS.md
333
MODELS.md
@@ -1,333 +0,0 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | FLOPs (G) | Use Case |
|
||||
| -------------- | ------ | ---- | ------ | ------ | ------ | --------- | ------------------------------ |
|
||||
| `YOLOV5S` ⭐ | 7.1M | 28MB | 94.33% | 92.61% | 83.15% | 5.751 | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 21.1M | 84MB | 95.30% | 93.76% | 85.28% | 18.146 | High accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Real-time detection (recommended)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5M,
|
||||
conf_thresh=0.6
|
||||
)
|
||||
|
||||
# Detect faces with landmarks
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks (5, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender_id, age = predictor.predict(image, bbox)
|
||||
# Returns: (gender_id, age_in_years)
|
||||
# gender_id: 0 for Female, 1 for Male
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
380
QUICKSTART.md
380
QUICKSTART.md
@@ -1,380 +0,0 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD, YOLOv5Face
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# Real-time with high accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
476
README.md
476
README.md
@@ -1,462 +1,126 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=80%>
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
> 💬 **Have questions?** [Chat with this codebase on DeepWiki](https://deepwiki.com/yakhyo/uniface) - AI-powered docs that let you ask anything about UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Facial Landmarks** — 106-point landmark localization
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes)
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
# Standard installation
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Platform-Specific Installation
|
||||
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
# GPU support (CUDA)
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
# From source
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
cd uniface && pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Face Detection
|
||||
## Quick Example
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize detector
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender_id, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, YOLOv5Face, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
|
||||
# YOLOv5-Face detection
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
| ------------------ | ------ | ------ | ------ | ---------------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| yolov5s_face | 94.33% | 92.61% | 83.15% | Real-time + accuracy |
|
||||
| yolov5m_face | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
_Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714), [YOLOv5-Face](https://arxiv.org/abs/2105.12931)_
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
## Documentation
|
||||
|
||||
### Webcam Face Detection
|
||||
📚 **Full documentation**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface/)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
| [Quickstart](https://yakhyo.github.io/uniface/quickstart/) | Get up and running in 5 minutes |
|
||||
| [Model Zoo](https://yakhyo.github.io/uniface/models/) | All models, benchmarks, and selection guide |
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
### Jupyter Notebooks
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
### Logging Configuration
|
||||
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Development
|
||||
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
- [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) — RetinaFace training
|
||||
- [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) — YOLOv5-Face ONNX
|
||||
- [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) — YOLOv8-Face ONNX
|
||||
- [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) — ArcFace, MobileFace, SphereFace
|
||||
- [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) — BiSeNet face parsing
|
||||
- [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) — MobileGaze training
|
||||
- [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) — MiniFASNet inference
|
||||
- [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) — FairFace attributes
|
||||
- [deepinsight/insightface](https://github.com/deepinsight/insightface) — Model architectures
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
||||
BIN
assets/einstien.png
Normal file
BIN
assets/einstien.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
BIN
assets/scientists.png
Normal file
BIN
assets/scientists.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
BIN
docs/assets/logo.png
Normal file
BIN
docs/assets/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
BIN
docs/assets/logo.webp
Normal file
BIN
docs/assets/logo.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
191
docs/concepts/coordinate-systems.md
Normal file
191
docs/concepts/coordinate-systems.md
Normal file
@@ -0,0 +1,191 @@
|
||||
# Coordinate Systems
|
||||
|
||||
This page explains the coordinate formats used in UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Image Coordinates
|
||||
|
||||
All coordinates use **pixel-based, top-left origin**:
|
||||
|
||||
```
|
||||
(0, 0) ────────────────► x (width)
|
||||
│
|
||||
│ Image
|
||||
│
|
||||
▼
|
||||
y (height)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bounding Box Format
|
||||
|
||||
Bounding boxes use `[x1, y1, x2, y2]` format (top-left and bottom-right corners):
|
||||
|
||||
```
|
||||
(x1, y1) ─────────────────┐
|
||||
│ │
|
||||
│ Face │
|
||||
│ │
|
||||
└─────────────────────┘ (x2, y2)
|
||||
```
|
||||
|
||||
### Accessing Coordinates
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Direct access
|
||||
x1, y1, x2, y2 = face.bbox
|
||||
|
||||
# As properties
|
||||
bbox_xyxy = face.bbox_xyxy # [x1, y1, x2, y2]
|
||||
bbox_xywh = face.bbox_xywh # [x1, y1, width, height]
|
||||
```
|
||||
|
||||
### Conversion
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# xyxy → xywh
|
||||
def xyxy_to_xywh(bbox):
|
||||
x1, y1, x2, y2 = bbox
|
||||
return np.array([x1, y1, x2 - x1, y2 - y1])
|
||||
|
||||
# xywh → xyxy
|
||||
def xywh_to_xyxy(bbox):
|
||||
x, y, w, h = bbox
|
||||
return np.array([x, y, x + w, y + h])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Landmarks
|
||||
|
||||
### 5-Point Landmarks (Detection)
|
||||
|
||||
Returned by all detection models:
|
||||
|
||||
```python
|
||||
landmarks = face.landmarks # Shape: (5, 2)
|
||||
```
|
||||
|
||||
| Index | Point |
|
||||
|-------|-------|
|
||||
| 0 | Left Eye |
|
||||
| 1 | Right Eye |
|
||||
| 2 | Nose Tip |
|
||||
| 3 | Left Mouth Corner |
|
||||
| 4 | Right Mouth Corner |
|
||||
|
||||
```
|
||||
0 ● ● 1
|
||||
|
||||
● 2
|
||||
|
||||
3 ● ● 4
|
||||
```
|
||||
|
||||
### 106-Point Landmarks
|
||||
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
# Shape: (106, 2)
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Face Crop
|
||||
|
||||
To crop a face from an image:
|
||||
|
||||
```python
|
||||
def crop_face(image, bbox, margin=0):
|
||||
"""Crop face with optional margin."""
|
||||
h, w = image.shape[:2]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Add margin
|
||||
if margin > 0:
|
||||
bw, bh = x2 - x1, y2 - y1
|
||||
x1 = max(0, x1 - int(bw * margin))
|
||||
y1 = max(0, y1 - int(bh * margin))
|
||||
x2 = min(w, x2 + int(bw * margin))
|
||||
y2 = min(h, y2 + int(bh * margin))
|
||||
|
||||
return image[y1:y2, x1:x2]
|
||||
|
||||
# Usage
|
||||
face_crop = crop_face(image, face.bbox, margin=0.1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Angles
|
||||
|
||||
Gaze estimation returns pitch and yaw in **radians**:
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Angles in radians
|
||||
pitch = result.pitch # Vertical: + = up, - = down
|
||||
yaw = result.yaw # Horizontal: + = right, - = left
|
||||
|
||||
# Convert to degrees
|
||||
import numpy as np
|
||||
pitch_deg = np.degrees(pitch)
|
||||
yaw_deg = np.degrees(yaw)
|
||||
```
|
||||
|
||||
**Angle Reference:**
|
||||
|
||||
```
|
||||
pitch = +90° (up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(left) │ (right)
|
||||
│
|
||||
pitch = -90° (down)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
# Output: 112x112 aligned face image
|
||||
```
|
||||
|
||||
The alignment transforms faces to a canonical pose for better recognition accuracy.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Data types reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
204
docs/concepts/execution-providers.md
Normal file
204
docs/concepts/execution-providers.md
Normal file
@@ -0,0 +1,204 @@
|
||||
# Execution Providers
|
||||
|
||||
UniFace uses ONNX Runtime for model inference, which supports multiple hardware acceleration backends.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Provider Selection
|
||||
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
2. **CoreMLExecutionProvider** - Apple Silicon
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
providers = ort.get_available_providers()
|
||||
print("Available providers:", providers)
|
||||
```
|
||||
|
||||
**Example outputs:**
|
||||
|
||||
=== "macOS (Apple Silicon)"
|
||||
|
||||
```
|
||||
['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Linux (NVIDIA GPU)"
|
||||
|
||||
```
|
||||
['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Windows (CPU)"
|
||||
|
||||
```
|
||||
['CPUExecutionProvider']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Setup
|
||||
|
||||
### Apple Silicon (M1/M2/M3/M4)
|
||||
|
||||
No additional setup required. ARM64 optimizations are built into `onnxruntime`:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Verify ARM64:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
!!! tip "Performance"
|
||||
Apple Silicon Macs use CoreML acceleration automatically, providing excellent performance for face analysis tasks.
|
||||
|
||||
---
|
||||
|
||||
### NVIDIA GPU (CUDA)
|
||||
|
||||
Install with GPU support:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- Compatible NVIDIA driver
|
||||
|
||||
Verify CUDA:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
||||
print("CUDA is available!")
|
||||
else:
|
||||
print("CUDA not available, using CPU")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU Fallback
|
||||
|
||||
CPU execution is always available:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms without additional configuration.
|
||||
|
||||
---
|
||||
|
||||
## Internal API
|
||||
|
||||
For advanced use cases, you can access the provider utilities:
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Models use create_onnx_session() internally
|
||||
# which auto-selects the best provider
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### 1. Use GPU When Available
|
||||
|
||||
For batch processing or real-time applications, GPU acceleration provides significant speedups:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### 2. Optimize Input Size
|
||||
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
```
|
||||
|
||||
### 3. Batch Processing
|
||||
|
||||
Process multiple images to maximize GPU utilization:
|
||||
|
||||
```python
|
||||
# Process images in batch (GPU-efficient)
|
||||
for image_path in image_paths:
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
# ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### CUDA Not Detected
|
||||
|
||||
1. Verify CUDA installation:
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check CUDA version compatibility with ONNX Runtime
|
||||
|
||||
3. Reinstall with GPU support:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using ARM64 Python (not Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Cache & Offline](model-cache-offline.md) - Model management
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning parameters
|
||||
218
docs/concepts/inputs-outputs.md
Normal file
218
docs/concepts/inputs-outputs.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# Inputs & Outputs
|
||||
|
||||
This page describes the data types used throughout UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Input: Images
|
||||
|
||||
All models accept NumPy arrays in **BGR format** (OpenCV default):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
# Load image (BGR format)
|
||||
image = cv2.imread("photo.jpg")
|
||||
print(f"Shape: {image.shape}") # (H, W, 3)
|
||||
print(f"Dtype: {image.dtype}") # uint8
|
||||
```
|
||||
|
||||
!!! warning "Color Format"
|
||||
UniFace expects **BGR** format (OpenCV default). If using PIL or other libraries, convert first:
|
||||
|
||||
```python
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
pil_image = Image.open("photo.jpg")
|
||||
bgr_image = np.array(pil_image)[:, :, ::-1] # RGB → BGR
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output: Face Dataclass
|
||||
|
||||
Detection returns a list of `Face` objects:
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
# Required (from detection)
|
||||
bbox: np.ndarray # [x1, y1, x2, y2]
|
||||
confidence: float # 0.0 to 1.0
|
||||
landmarks: np.ndarray # (5, 2) or (106, 2)
|
||||
|
||||
# Optional (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None # 0=Female, 1=Male
|
||||
age: int | None = None # Years
|
||||
age_group: str | None = None # "20-29", etc.
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
```
|
||||
|
||||
### Properties
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Bounding box formats
|
||||
face.bbox_xyxy # [x1, y1, x2, y2] - same as bbox
|
||||
face.bbox_xywh # [x1, y1, width, height]
|
||||
|
||||
# Gender as string
|
||||
face.sex # "Female" or "Male" (None if not predicted)
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Compute similarity with another face
|
||||
similarity = face1.compute_similarity(face2)
|
||||
|
||||
# Convert to dictionary
|
||||
face_dict = face.to_dict()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Result Types
|
||||
|
||||
### GazeResult
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GazeResult:
|
||||
pitch: float # Vertical angle (radians), + = up
|
||||
yaw: float # Horizontal angle (radians), + = right
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°")
|
||||
print(f"Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SpoofingResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class SpoofingResult:
|
||||
is_real: bool # True = real, False = fake
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### AttributeResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class AttributeResult:
|
||||
gender: int # 0=Female, 1=Male
|
||||
age: int | None # Years (AgeGender model)
|
||||
age_group: str | None # "20-29" (FairFace model)
|
||||
race: str | None # Race label (FairFace model)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
return "Female" if self.gender == 0 else "Male"
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
# AgeGender model
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
|
||||
# FairFace model
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### EmotionResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class EmotionResult:
|
||||
emotion: str # "Happy", "Sad", etc.
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
Face recognition models return normalized 512-dimensional embeddings:
|
||||
|
||||
```python
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f"Shape: {embedding.shape}") # (1, 512)
|
||||
print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
```
|
||||
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parsing Masks
|
||||
|
||||
Face parsing returns a segmentation mask:
|
||||
|
||||
```python
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Shape: {mask.shape}") # (H, W)
|
||||
print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
```
|
||||
|
||||
**19 Classes:**
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Coordinate Systems](coordinate-systems.md) - Bbox and landmark formats
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning confidence thresholds
|
||||
220
docs/concepts/model-cache-offline.md
Normal file
220
docs/concepts/model-cache-offline.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# Model Cache & Offline Use
|
||||
|
||||
UniFace automatically downloads and caches models. This page explains how model management works.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Download
|
||||
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
|
||||
# Subsequent runs: loads from cache
|
||||
detector = RetinaFace() # Instant
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cache Location
|
||||
|
||||
Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
```
|
||||
|
||||
**Example structure:**
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mv2.onnx
|
||||
├── w600k_mbf.onnx
|
||||
├── 2d106det.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
|
||||
```bash
|
||||
python tools/download_model.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Offline Use
|
||||
|
||||
For air-gapped or offline environments:
|
||||
|
||||
### 1. Pre-download models
|
||||
|
||||
On a connected machine:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Copy from: {path}")
|
||||
```
|
||||
|
||||
### 2. Copy to target machine
|
||||
|
||||
```bash
|
||||
# Copy the entire cache directory
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Verification
|
||||
|
||||
Models are verified with SHA-256 checksums:
|
||||
|
||||
```python
|
||||
from uniface.constants import MODEL_SHA256, RetinaFaceWeights
|
||||
|
||||
# Check expected checksum
|
||||
expected = MODEL_SHA256[RetinaFaceWeights.MNET_V2]
|
||||
print(f"Expected SHA256: {expected}")
|
||||
```
|
||||
|
||||
If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
### Detection Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| RetinaFace MNET_025 | 1.7 MB | ✅ |
|
||||
| RetinaFace MNET_V2 | 3.5 MB | ✅ |
|
||||
| RetinaFace RESNET34 | 56 MB | ✅ |
|
||||
| SCRFD 500M | 2.5 MB | ✅ |
|
||||
| SCRFD 10G | 17 MB | ✅ |
|
||||
| YOLOv5n-Face | 11 MB | ✅ |
|
||||
| YOLOv5s-Face | 28 MB | ✅ |
|
||||
| YOLOv5m-Face | 82 MB | ✅ |
|
||||
| YOLOv8-Lite-S | 7.4 MB | ✅ |
|
||||
| YOLOv8n-Face | 12 MB | ✅ |
|
||||
|
||||
### Recognition Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| ArcFace MNET | 8 MB | ✅ |
|
||||
| ArcFace RESNET | 166 MB | ✅ |
|
||||
| MobileFace MNET_V2 | 4 MB | ✅ |
|
||||
| SphereFace SPHERE20 | 50 MB | ✅ |
|
||||
|
||||
### Other Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| Landmark106 | 14 MB | ✅ |
|
||||
| AgeGender | 8 MB | ✅ |
|
||||
| FairFace | 44 MB | ✅ |
|
||||
| Gaze ResNet34 | 82 MB | ✅ |
|
||||
| BiSeNet ResNet18 | 51 MB | ✅ |
|
||||
| MiniFASNet V2 | 1.2 MB | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
rm -rf ~/.uniface/models/
|
||||
|
||||
# Remove specific model
|
||||
rm ~/.uniface/models/retinaface_mv2.onnx
|
||||
```
|
||||
|
||||
Models will be re-downloaded on next use.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tune model parameters
|
||||
- [Detection Module](../modules/detection.md) - Detection model details
|
||||
196
docs/concepts/overview.md
Normal file
196
docs/concepts/overview.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Overview
|
||||
|
||||
UniFace is designed as a modular, production-ready face analysis library. This page explains the architecture and design principles.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
UniFace follows a modular architecture where each face analysis task is handled by a dedicated module:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph Input
|
||||
IMG[Image/Frame]
|
||||
end
|
||||
|
||||
subgraph Detection
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face / YOLOv8Face]
|
||||
end
|
||||
|
||||
subgraph Analysis
|
||||
REC[Recognition]
|
||||
LMK[Landmarks]
|
||||
ATTR[Attributes]
|
||||
GAZE[Gaze]
|
||||
PARSE[Parsing]
|
||||
SPOOF[Anti-Spoofing]
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
|
||||
IMG --> DET
|
||||
DET --> REC
|
||||
DET --> LMK
|
||||
DET --> ATTR
|
||||
DET --> GAZE
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. ONNX-First
|
||||
|
||||
All models use ONNX Runtime for inference:
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
- **Production-ready**: No Python-only dependencies for inference
|
||||
|
||||
### 2. Minimal Dependencies
|
||||
|
||||
Core dependencies are kept minimal:
|
||||
|
||||
```
|
||||
numpy # Array operations
|
||||
opencv-python # Image processing
|
||||
onnxruntime # Model inference
|
||||
requests # Model download
|
||||
tqdm # Progress bars
|
||||
```
|
||||
|
||||
### 3. Simple API
|
||||
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
|
||||
Full type hints throughout:
|
||||
|
||||
```python
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── gaze/ # Gaze estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# 2. Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# 3. Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# 4. Analyze each face
|
||||
for face in faces:
|
||||
# Recognition embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
print(f"Face: {attrs.sex}, {attrs.age} years")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FaceAnalyzer
|
||||
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
|
||||
```python
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Understand data types
|
||||
- [Execution Providers](execution-providers.md) - Hardware acceleration
|
||||
- [Detection Module](../modules/detection.md) - Start with face detection
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
234
docs/concepts/thresholds-calibration.md
Normal file
234
docs/concepts/thresholds-calibration.md
Normal file
@@ -0,0 +1,234 @@
|
||||
# Thresholds & Calibration
|
||||
|
||||
This page explains how to tune detection and recognition thresholds for your use case.
|
||||
|
||||
---
|
||||
|
||||
## Detection Thresholds
|
||||
|
||||
### Confidence Threshold
|
||||
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
|
||||
# High precision (fewer false positives)
|
||||
detector = RetinaFace(confidence_threshold=0.8)
|
||||
|
||||
# High recall (catch more faces)
|
||||
detector = RetinaFace(confidence_threshold=0.3)
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
| Threshold | Use Case |
|
||||
|-----------|----------|
|
||||
| 0.3 - 0.4 | Maximum recall (research, analysis) |
|
||||
| 0.5 - 0.6 | Balanced (default, general use) |
|
||||
| 0.7 - 0.9 | High precision (production, security) |
|
||||
|
||||
---
|
||||
|
||||
### NMS Threshold
|
||||
|
||||
Non-Maximum Suppression removes overlapping detections:
|
||||
|
||||
```python
|
||||
# Default
|
||||
detector = RetinaFace(nms_threshold=0.4)
|
||||
|
||||
# Stricter (fewer overlapping boxes)
|
||||
detector = RetinaFace(nms_threshold=0.3)
|
||||
|
||||
# Looser (for crowded scenes)
|
||||
detector = RetinaFace(nms_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Input Size
|
||||
|
||||
Affects detection accuracy and speed:
|
||||
|
||||
```python
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
|
||||
# Higher accuracy, slower
|
||||
detector = RetinaFace(input_size=(1280, 1280))
|
||||
```
|
||||
|
||||
!!! tip "Dynamic Size"
|
||||
For RetinaFace, enable dynamic input for variable image sizes:
|
||||
```python
|
||||
detector = RetinaFace(dynamic_size=True)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recognition Thresholds
|
||||
|
||||
### Similarity Threshold
|
||||
|
||||
For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
# Threshold interpretation
|
||||
if similarity > 0.6:
|
||||
print("Same person (high confidence)")
|
||||
elif similarity > 0.4:
|
||||
print("Uncertain (manual review)")
|
||||
else:
|
||||
print("Different people")
|
||||
```
|
||||
|
||||
**Recommended thresholds:**
|
||||
|
||||
| Threshold | Decision | False Accept Rate |
|
||||
|-----------|----------|-------------------|
|
||||
| 0.4 | Low security | Higher FAR |
|
||||
| 0.5 | Balanced | Moderate FAR |
|
||||
| 0.6 | High security | Lower FAR |
|
||||
| 0.7 | Very strict | Very low FAR |
|
||||
|
||||
---
|
||||
|
||||
### Calibration for Your Dataset
|
||||
|
||||
Test on your data to find optimal thresholds:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def calibrate_threshold(same_pairs, diff_pairs, recognizer, detector):
|
||||
"""Find optimal threshold for your dataset."""
|
||||
same_scores = []
|
||||
diff_scores = []
|
||||
|
||||
# Compute similarities for same-person pairs
|
||||
for img1_path, img2_path in same_pairs:
|
||||
img1 = cv2.imread(img1_path)
|
||||
img2 = cv2.imread(img2_path)
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if faces1 and faces2:
|
||||
emb1 = recognizer.get_normalized_embedding(img1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(img2, faces2[0].landmarks)
|
||||
same_scores.append(np.dot(emb1, emb2.T)[0][0])
|
||||
|
||||
# Compute similarities for different-person pairs
|
||||
for img1_path, img2_path in diff_pairs:
|
||||
# ... similar process
|
||||
diff_scores.append(similarity)
|
||||
|
||||
# Find optimal threshold
|
||||
thresholds = np.arange(0.3, 0.8, 0.05)
|
||||
best_threshold = 0.5
|
||||
best_accuracy = 0
|
||||
|
||||
for thresh in thresholds:
|
||||
tp = sum(1 for s in same_scores if s >= thresh)
|
||||
tn = sum(1 for s in diff_scores if s < thresh)
|
||||
accuracy = (tp + tn) / (len(same_scores) + len(diff_scores))
|
||||
|
||||
if accuracy > best_accuracy:
|
||||
best_accuracy = accuracy
|
||||
best_threshold = thresh
|
||||
|
||||
return best_threshold, best_accuracy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Thresholds
|
||||
|
||||
The MiniFASNet model returns a confidence score:
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
spoofer = MiniFASNet()
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# Default threshold (0.5)
|
||||
if result.is_real: # confidence > 0.5
|
||||
print("Real face")
|
||||
|
||||
# Custom threshold for high security
|
||||
SPOOF_THRESHOLD = 0.7
|
||||
if result.confidence > SPOOF_THRESHOLD:
|
||||
print("Real face (high confidence)")
|
||||
else:
|
||||
print("Potentially fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attribute Model Confidence
|
||||
|
||||
### Emotion
|
||||
|
||||
```python
|
||||
result = emotion_predictor.predict(image, landmarks)
|
||||
|
||||
# Filter low-confidence predictions
|
||||
if result.confidence > 0.6:
|
||||
print(f"Emotion: {result.emotion}")
|
||||
else:
|
||||
print("Uncertain emotion")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Threshold
|
||||
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
scores = [f.confidence for f in faces if f.confidence > 0.7]
|
||||
landmarks = [f.landmarks for f in faces if f.confidence > 0.7]
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6 # Additional visualization filter
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Parameter | Default | Range | Lower = | Higher = |
|
||||
|-----------|---------|-------|---------|----------|
|
||||
| `confidence_threshold` | 0.5 | 0.1-0.9 | More detections | Fewer false positives |
|
||||
| `nms_threshold` | 0.4 | 0.1-0.7 | Fewer overlaps | More overlapping boxes |
|
||||
| Similarity threshold | 0.6 | 0.3-0.8 | More matches (FAR↑) | Fewer matches (FRR↑) |
|
||||
| Spoof confidence | 0.5 | 0.3-0.9 | More "real" | Stricter liveness |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Detection model options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition model options
|
||||
72
docs/contributing.md
Normal file
72
docs/contributing.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Contributing
|
||||
|
||||
Thank you for contributing to UniFace!
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
We use [Ruff](https://docs.astral.sh/ruff/) for formatting:
|
||||
|
||||
```bash
|
||||
ruff format .
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
- Line length: 120
|
||||
- Python 3.11+ type hints
|
||||
- Google-style docstrings
|
||||
|
||||
---
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Write tests for new features
|
||||
4. Ensure tests pass
|
||||
5. Submit PR with clear description
|
||||
|
||||
---
|
||||
|
||||
## Adding New Models
|
||||
|
||||
1. Create model class in appropriate submodule
|
||||
2. Add weight constants to `uniface/constants.py`
|
||||
3. Export in `__init__.py` files
|
||||
4. Write tests in `tests/`
|
||||
5. Add example in `tools/` or notebooks
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue on [GitHub](https://github.com/yakhyo/uniface/issues).
|
||||
133
docs/index.md
Normal file
133
docs/index.md
Normal file
@@ -0,0 +1,133 @@
|
||||
---
|
||||
hide:
|
||||
- toc
|
||||
- navigation
|
||||
- edit
|
||||
template: home.html
|
||||
---
|
||||
|
||||
<div class="hero" markdown>
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">A lightweight, production-ready face analysis library built on ONNX Runtime</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
|
||||
</div>
|
||||
|
||||
<div class="feature-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized detectors (RetinaFace, SCRFD, YOLO) with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-check: Face Recognition
|
||||
AdaFace, ArcFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-map-marker: Landmarks
|
||||
Accurate 106-point facial landmark localization for detailed face analysis.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-details: Attributes
|
||||
Age, gender, race (FairFace), and emotion detection from faces.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-man-shimmer: Face Parsing
|
||||
BiSeNet semantic segmentation with 19 facial component classes.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-eye: Gaze Estimation
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-blur: Privacy
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
=== "Standard"
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
=== "GPU (CUDA)"
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
<div class="next-steps-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-rocket-launch: Quickstart
|
||||
Get up and running in 5 minutes with common use cases.
|
||||
|
||||
[Quickstart Guide →](quickstart.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-school: Tutorials
|
||||
Step-by-step examples for common workflows.
|
||||
|
||||
[View Tutorials →](recipes/image-pipeline.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-api: API Reference
|
||||
Explore individual modules and their APIs.
|
||||
|
||||
[Browse API →](modules/detection.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-book-open-variant: Guides
|
||||
Learn about the architecture and design principles.
|
||||
|
||||
[Read Guides →](concepts/overview.md)
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
174
docs/installation.md
Normal file
174
docs/installation.md
Normal file
@@ -0,0 +1,174 @@
|
||||
# Installation
|
||||
|
||||
This guide covers all installation options for UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: 3.11 or higher
|
||||
- **Operating Systems**: macOS, Linux, Windows
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
The simplest way to install UniFace:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
This installs the CPU version with all core dependencies.
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Installation
|
||||
|
||||
### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes ARM64 optimizations:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
!!! tip "Native Performance"
|
||||
The base `onnxruntime` package has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. No additional configuration needed.
|
||||
|
||||
Verify ARM64 installation:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility matrix.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
# Should include: 'CUDAExecutionProvider'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms with automatic CPU fallback.
|
||||
|
||||
---
|
||||
|
||||
## Install from Source
|
||||
|
||||
For development or the latest features:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
With development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
UniFace has minimal dependencies:
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
|
||||
Test your installation:
|
||||
|
||||
```python
|
||||
import uniface
|
||||
print(f"UniFace version: {uniface.__version__}")
|
||||
|
||||
# Check available ONNX providers
|
||||
import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.11+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.11.x or higher
|
||||
```
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
Models are automatically downloaded on first use. If downloads fail:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Performance Issues on Mac
|
||||
|
||||
Verify you're using the ARM64 build (not x86_64 via Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart Guide](quickstart.md) - Get started in 5 minutes
|
||||
- [Execution Providers](concepts/execution-providers.md) - Hardware acceleration setup
|
||||
24
docs/license-attribution.md
Normal file
24
docs/license-attribution.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Licenses & Attribution
|
||||
|
||||
## UniFace License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
||||
---
|
||||
|
||||
## Model Credits
|
||||
|
||||
| Model | Source | License |
|
||||
|-------|--------|---------|
|
||||
| RetinaFace | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | MIT |
|
||||
| SCRFD | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| YOLOv5-Face | [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | GPL-3.0 |
|
||||
| YOLOv8-Face | [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | GPL-3.0 |
|
||||
| AdaFace | [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) | MIT |
|
||||
| ArcFace | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| MobileFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| BiSeNet | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | MIT |
|
||||
| MobileGaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | MIT |
|
||||
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
|
||||
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
|
||||
358
docs/models.md
Normal file
358
docs/models.md
Normal file
@@ -0,0 +1,358 @@
|
||||
# Model Zoo
|
||||
|
||||
Complete guide to all available models and their performance characteristics.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on WIDER FACE dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on WIDER FACE dataset.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| -------------- | ---- | ------ | ------ | ------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
### YOLOv8-Face Family
|
||||
|
||||
YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for bbox regression. Provides detection with 5-point facial landmarks.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ------ | ------ | ------ |
|
||||
| `YOLOV8_LITE_S`| 7.4MB | 93.4% | 91.2% | 78.6% |
|
||||
| `YOLOV8N` :material-check-circle: | 12MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
| Model Name | Backbone | Dataset | Size | IJB-B TAR | IJB-C TAR |
|
||||
| ----------- | -------- | ----------- | ------ | --------- | --------- |
|
||||
| `IR_18` :material-check-circle: | IR-18 | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Training Data & Accuracy"
|
||||
**Dataset**: WebFace4M (4M images) / WebFace12M (12M images)
|
||||
|
||||
**Accuracy**: IJB-B and IJB-C benchmarks, TAR@FAR=0.01%
|
||||
|
||||
!!! tip "Key Innovation"
|
||||
AdaFace introduces adaptive margin that adjusts based on image quality, providing better performance on low-quality images compared to fixed-margin approaches.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
| ----------- | --------- | ------ | ----- | ------ | ------ | -------- | ----- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C accuracy reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! note "Architecture"
|
||||
SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
Facial landmark localization model.
|
||||
|
||||
| Model Name | Points | Params | Size |
|
||||
| ---------- | ------ | ------ | ---- |
|
||||
| `2D106` | 106 | 3.7M | 14MB |
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Group | Points | Count |
|
||||
|-------|--------|-------|
|
||||
| Face contour | 0-32 | 33 points |
|
||||
| Eyebrows | 33-50 | 18 points |
|
||||
| Nose | 51-62 | 12 points |
|
||||
| Eyes | 63-86 | 24 points |
|
||||
| Mouth | 87-105 | 19 points |
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | ----------- | ------ | ---- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebA
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | --------------------- | ------ | ----- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size |
|
||||
| ------------- | ------- | ------ | ---- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on AffectNet
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation Models
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* |
|
||||
| -------------- | ------ | ------- | ----- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes |
|
||||
| -------------- | ------ | ------- | ------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale |
|
||||
| ---------- | ------ | ----- |
|
||||
| `V1SE` | 1.2 MB | 4.0 |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
!!! info "Output Format"
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face bounding box from a detector.
|
||||
|
||||
---
|
||||
|
||||
## Model Management
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **YOLOv8-Face Original**: [derronqi/yolov8-face](https://github.com/derronqi/yolov8-face) - Original PyTorch implementation
|
||||
- **YOLOv8-Face ONNX**: [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) - ONNX inference implementation
|
||||
- **AdaFace Original**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace) - Original PyTorch implementation
|
||||
- **AdaFace ONNX**: [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) - ONNX export and inference
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **AdaFace**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
279
docs/modules/attributes.md
Normal file
279
docs/modules/attributes.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# Attributes
|
||||
|
||||
Facial attribute analysis for age, gender, race, and emotion detection.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Attributes | Size | Notes |
|
||||
|-------|------------|------|-------|
|
||||
| **AgeGender** | Age, Gender | 8 MB | Exact age prediction |
|
||||
| **FairFace** | Gender, Age Group, Race | 44 MB | Balanced demographics |
|
||||
| **Emotion** | 7-8 emotions | 2 MB | Requires PyTorch |
|
||||
|
||||
---
|
||||
|
||||
## AgeGender
|
||||
|
||||
Predicts exact age and binary gender.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}") # "Female" or "Male"
|
||||
print(f"Age: {result.age} years")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male" (property)
|
||||
result.age # int, age in years
|
||||
result.age_group # None (not provided by this model)
|
||||
result.race # None (not provided by this model)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace
|
||||
|
||||
Predicts gender, age group, and race with balanced demographics.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}")
|
||||
print(f"Age Group: {result.age_group}")
|
||||
print(f"Race: {result.race}")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male"
|
||||
result.age # None (not provided by this model)
|
||||
result.age_group # "20-29", "30-39", etc.
|
||||
result.race # Race/ethnicity label
|
||||
```
|
||||
|
||||
### Race Categories
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| White |
|
||||
| Black |
|
||||
| Latino Hispanic |
|
||||
| East Asian |
|
||||
| Southeast Asian |
|
||||
| Indian |
|
||||
| Middle Eastern |
|
||||
|
||||
### Age Groups
|
||||
|
||||
| Group |
|
||||
|-------|
|
||||
| 0-2 |
|
||||
| 3-9 |
|
||||
| 10-19 |
|
||||
| 20-29 |
|
||||
| 30-39 |
|
||||
| 40-49 |
|
||||
| 50-59 |
|
||||
| 60-69 |
|
||||
| 70+ |
|
||||
|
||||
---
|
||||
|
||||
## Emotion
|
||||
|
||||
Predicts facial emotions. Requires PyTorch.
|
||||
|
||||
!!! warning "Optional Dependency"
|
||||
Emotion detection requires PyTorch. Install with:
|
||||
```bash
|
||||
pip install torch
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
detector = RetinaFace()
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = emotion.predict(image, face.landmarks)
|
||||
print(f"Emotion: {result.emotion}")
|
||||
print(f"Confidence: {result.confidence:.2%}")
|
||||
```
|
||||
|
||||
### Emotion Classes
|
||||
|
||||
=== "7-Class (AFFECNET7)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
# 7-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
# 8-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Combining Models
|
||||
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Get exact age from AgeGender
|
||||
ag_result = age_gender.predict(image, face.bbox)
|
||||
|
||||
# Get race from FairFace
|
||||
ff_result = fairface.predict(image, face.bbox)
|
||||
|
||||
print(f"Gender: {ag_result.sex}")
|
||||
print(f"Exact Age: {ag_result.age}")
|
||||
print(f"Age Group: {ff_result.age_group}")
|
||||
print(f"Race: {ff_result.race}")
|
||||
```
|
||||
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=False,
|
||||
attributes=True # Uses AgeGender
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_attributes(image, face, result):
|
||||
"""Draw attributes on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Build label
|
||||
label = f"{result.sex}"
|
||||
if result.age:
|
||||
label += f", {result.age}y"
|
||||
if result.age_group:
|
||||
label += f", {result.age_group}"
|
||||
if result.race:
|
||||
label += f", {result.race}"
|
||||
|
||||
# Draw label
|
||||
cv2.putText(
|
||||
image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
|
||||
)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
image = draw_attributes(image, face, result)
|
||||
|
||||
cv2.imwrite("attributes.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Accuracy Notes
|
||||
|
||||
!!! note "Model Limitations"
|
||||
- **AgeGender**: Trained on CelebA; accuracy varies by demographic
|
||||
- **FairFace**: Trained for balanced demographics; better cross-racial accuracy
|
||||
- **Emotion**: Accuracy depends on facial expression clarity
|
||||
|
||||
Always test on your specific use case and consider cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Parsing](parsing.md) - Face semantic segmentation
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
305
docs/modules/detection.md
Normal file
305
docs/modules/detection.md
Normal file
@@ -0,0 +1,305 @@
|
||||
# Detection
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides four detection models.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Easy | Medium | Hard | Landmarks |
|
||||
|-------|----------|------|------|--------|------|:---------:|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% | 91.0% | 86.6% | :material-check: |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% | 93.9% | 83.1% | :material-check: |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% | 92.6% | 83.2% | :material-check: |
|
||||
| **YOLOv8-Face** | YOLOv8n | 12 MB | 94.6% | 92.3% | 79.6% | :material-check: |
|
||||
|
||||
!!! note "Dataset"
|
||||
All models trained on WIDERFACE dataset.
|
||||
---
|
||||
|
||||
## RetinaFace
|
||||
|
||||
Single-shot face detector with multi-scale feature pyramid.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}") # (5, 2)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# High accuracy
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| MNET_025 | 0.4M | 1.7 MB | 88.5% | 87.0% | 80.6% |
|
||||
| MNET_050 | 1.0M | 2.6 MB | 89.4% | 88.0% | 82.4% |
|
||||
| MNET_V1 | 3.5M | 3.8 MB | 90.6% | 89.1% | 84.1% |
|
||||
| **MNET_V2** :material-check-circle: | 3.2M | 3.5 MB | 91.7% | 91.0% | 86.6% |
|
||||
| RESNET18 | 11.7M | 27 MB | 92.5% | 91.0% | 86.6% |
|
||||
| RESNET34 | 24.8M | 56 MB | 94.2% | 93.1% | 88.9% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
confidence_threshold=0.5, # Min confidence
|
||||
nms_threshold=0.4, # NMS IoU threshold
|
||||
input_size=(640, 640), # Input resolution
|
||||
dynamic_size=False # Enable dynamic input size
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SCRFD
|
||||
|
||||
State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
|
||||
# High accuracy (default)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| SCRFD_500M_KPS | 0.6M | 2.5 MB | 90.6% | 88.1% | 68.5% |
|
||||
| **SCRFD_10G_KPS** :material-check-circle: | 4.2M | 17 MB | 95.2% | 93.9% | 83.1% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv5-Face
|
||||
|
||||
YOLO-based detection optimized for faces.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5N)
|
||||
|
||||
# Balanced (default)
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5S)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV5N | 11 MB | 93.6% | 91.5% | 80.5% |
|
||||
| **YOLOV5S** :material-check-circle: | 28 MB | 94.3% | 92.6% | 83.2% |
|
||||
| YOLOV5M | 82 MB | 95.3% | 93.8% | 85.3% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv5-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5,
|
||||
nms_mode='numpy' # or 'torchvision' for faster NMS
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv8-Face
|
||||
|
||||
Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regression.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8_LITE_S)
|
||||
|
||||
# Recommended (default)
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV8_LITE_S | 7.4 MB | 93.4% | 91.2% | 78.6% |
|
||||
| **YOLOV8N** :material-check-circle: | 12 MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv8-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv8Face(
|
||||
model_name=YOLOv8FaceWeights.YOLOV8N,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.45,
|
||||
nms_mode='numpy' # or 'torchvision' for faster NMS
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
detector = create_detector('scrfd')
|
||||
# or
|
||||
detector = create_detector('yolov5face')
|
||||
# or
|
||||
detector = create_detector('yolov8face')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# Using RetinaFace (default)
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.5)
|
||||
|
||||
# Using YOLOv8-Face
|
||||
faces = detect_faces(image, method='yolov8face', confidence_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
|
||||
```python
|
||||
for face in faces:
|
||||
# Bounding box [x1, y1, x2, y2]
|
||||
bbox = face.bbox
|
||||
|
||||
# Detection confidence (0-1)
|
||||
confidence = face.confidence
|
||||
|
||||
# 5-point landmarks (5, 2)
|
||||
landmarks = face.landmarks
|
||||
# [left_eye, right_eye, nose, left_mouth, right_mouth]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces],
|
||||
vis_threshold=0.6
|
||||
)
|
||||
|
||||
cv2.imwrite("result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg --iterations 100
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
|
||||
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters
|
||||
270
docs/modules/gaze.md
Normal file
270
docs/modules/gaze.md
Normal file
@@ -0,0 +1,270 @@
|
||||
# Gaze Estimation
|
||||
|
||||
Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* |
|
||||
|-------|----------|------|------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° |
|
||||
|
||||
*MAE = Mean Absolute Error on Gaze360 test set (lower is better)
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
# Estimate gaze
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Convert to degrees
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
print(f"Pitch: {pitch_deg:.1f}°, Yaw: {yaw_deg:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
gaze = MobileGaze()
|
||||
|
||||
# Lightweight for mobile/edge
|
||||
gaze = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Higher accuracy
|
||||
gaze = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# GazeResult dataclass
|
||||
result.pitch # Vertical angle in radians
|
||||
result.yaw # Horizontal angle in radians
|
||||
```
|
||||
|
||||
### Angle Convention
|
||||
|
||||
```
|
||||
pitch = +90° (looking up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(looking left) │ (looking right)
|
||||
│
|
||||
pitch = -90° (looking down)
|
||||
```
|
||||
|
||||
- **Pitch**: Vertical gaze angle
|
||||
- Positive = looking up
|
||||
- Negative = looking down
|
||||
|
||||
- **Yaw**: Horizontal gaze angle
|
||||
- Positive = looking right
|
||||
- Negative = looking left
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw gaze arrow on image
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
### Custom Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
"""Draw custom gaze arrow."""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Face center
|
||||
cx = (x1 + x2) // 2
|
||||
cy = (y1 + y2) // 2
|
||||
|
||||
# Calculate endpoint
|
||||
dx = -length * np.sin(yaw) * np.cos(pitch)
|
||||
dy = -length * np.sin(pitch)
|
||||
|
||||
# Draw arrow
|
||||
end_x = int(cx + dx)
|
||||
end_y = int(cy + dy)
|
||||
|
||||
cv2.arrowedLine(image, (cx, cy), (end_x, end_y), color, 2, tipLength=0.3)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Gaze Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Draw gaze
|
||||
draw_gaze(frame, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
# Display angles
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
label = f"P:{pitch_deg:.0f} Y:{yaw_deg:.0f}"
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
cv2.imshow("Gaze Estimation", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Attention Detection
|
||||
|
||||
```python
|
||||
def is_looking_at_camera(result, threshold=15):
|
||||
"""Check if person is looking at camera."""
|
||||
pitch_deg = abs(np.degrees(result.pitch))
|
||||
yaw_deg = abs(np.degrees(result.yaw))
|
||||
|
||||
return pitch_deg < threshold and yaw_deg < threshold
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
if is_looking_at_camera(result):
|
||||
print("Looking at camera")
|
||||
else:
|
||||
print("Looking away")
|
||||
```
|
||||
|
||||
### Gaze Direction Classification
|
||||
|
||||
```python
|
||||
def classify_gaze_direction(result, threshold=20):
|
||||
"""Classify gaze into directions."""
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
directions = []
|
||||
|
||||
if pitch_deg > threshold:
|
||||
directions.append("up")
|
||||
elif pitch_deg < -threshold:
|
||||
directions.append("down")
|
||||
|
||||
if yaw_deg > threshold:
|
||||
directions.append("right")
|
||||
elif yaw_deg < -threshold:
|
||||
directions.append("left")
|
||||
|
||||
if not directions:
|
||||
return "center"
|
||||
|
||||
return " ".join(directions)
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
direction = classify_gaze_direction(result)
|
||||
print(f"Looking: {direction}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
251
docs/modules/landmarks.md
Normal file
251
docs/modules/landmarks.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# Landmarks
|
||||
|
||||
Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Points | Size |
|
||||
|-------|--------|------|
|
||||
| **Landmark106** | 106 | 14 MB |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
|
||||
|
||||
---
|
||||
|
||||
## 106-Point Landmarks
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Get detailed landmarks
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Landmarks shape: {landmarks.shape}") # (106, 2)
|
||||
```
|
||||
|
||||
### Landmark Groups
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
### Extract Specific Features
|
||||
|
||||
```python
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
|
||||
# Face contour
|
||||
contour = landmarks[0:33]
|
||||
|
||||
# Left eyebrow
|
||||
left_eyebrow = landmarks[33:42]
|
||||
|
||||
# Right eyebrow
|
||||
right_eyebrow = landmarks[42:51]
|
||||
|
||||
# Nose
|
||||
nose = landmarks[51:63]
|
||||
|
||||
# Left eye
|
||||
left_eye = landmarks[63:72]
|
||||
|
||||
# Right eye
|
||||
right_eye = landmarks[76:84]
|
||||
|
||||
# Mouth
|
||||
mouth = landmarks[87:106]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5-Point Landmarks (Detection)
|
||||
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks_5 = faces[0].landmarks
|
||||
print(f"Shape: {landmarks_5.shape}") # (5, 2)
|
||||
|
||||
left_eye = landmarks_5[0]
|
||||
right_eye = landmarks_5[1]
|
||||
nose = landmarks_5[2]
|
||||
left_mouth = landmarks_5[3]
|
||||
right_mouth = landmarks_5[4]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
### Draw 106 Landmarks
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_landmarks(image, landmarks, color=(0, 255, 0), radius=2):
|
||||
"""Draw landmarks on image."""
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), radius, color, -1)
|
||||
return image
|
||||
|
||||
# Usage
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
image_with_landmarks = draw_landmarks(image.copy(), landmarks)
|
||||
cv2.imwrite("landmarks.jpg", image_with_landmarks)
|
||||
```
|
||||
|
||||
### Draw with Connections
|
||||
|
||||
```python
|
||||
def draw_landmarks_with_connections(image, landmarks):
|
||||
"""Draw landmarks with facial feature connections."""
|
||||
landmarks = landmarks.astype(int)
|
||||
|
||||
# Face contour (0-32)
|
||||
for i in range(32):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (255, 255, 0), 1)
|
||||
|
||||
# Left eyebrow (33-41)
|
||||
for i in range(33, 41):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Right eyebrow (42-50)
|
||||
for i in range(42, 50):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Nose (51-62)
|
||||
for i in range(51, 62):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 0, 255), 1)
|
||||
|
||||
# Draw points
|
||||
for x, y in landmarks:
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 255), -1)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
# Returns: 112x112 aligned face
|
||||
```
|
||||
|
||||
### Eye Aspect Ratio (Blink Detection)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def eye_aspect_ratio(eye_landmarks):
|
||||
"""Calculate eye aspect ratio for blink detection."""
|
||||
# Vertical distances
|
||||
v1 = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
|
||||
v2 = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
|
||||
|
||||
# Horizontal distance
|
||||
h = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
|
||||
|
||||
ear = (v1 + v2) / (2.0 * h)
|
||||
return ear
|
||||
|
||||
# Usage with 106-point landmarks
|
||||
left_eye = landmarks[63:72] # Approximate eye points
|
||||
ear = eye_aspect_ratio(left_eye)
|
||||
|
||||
if ear < 0.2:
|
||||
print("Eye closed (blink detected)")
|
||||
```
|
||||
|
||||
### Head Pose Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def estimate_head_pose(landmarks, image_shape):
|
||||
"""Estimate head pose from facial landmarks."""
|
||||
# 3D model points (generic face model)
|
||||
model_points = np.array([
|
||||
(0.0, 0.0, 0.0), # Nose tip
|
||||
(0.0, -330.0, -65.0), # Chin
|
||||
(-225.0, 170.0, -135.0), # Left eye corner
|
||||
(225.0, 170.0, -135.0), # Right eye corner
|
||||
(-150.0, -150.0, -125.0), # Left mouth corner
|
||||
(150.0, -150.0, -125.0) # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# 2D image points (from 106 landmarks)
|
||||
image_points = np.array([
|
||||
landmarks[51], # Nose tip
|
||||
landmarks[16], # Chin
|
||||
landmarks[63], # Left eye corner
|
||||
landmarks[76], # Right eye corner
|
||||
landmarks[87], # Left mouth corner
|
||||
landmarks[93] # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# Camera matrix
|
||||
h, w = image_shape[:2]
|
||||
focal_length = w
|
||||
center = (w / 2, h / 2)
|
||||
camera_matrix = np.array([
|
||||
[focal_length, 0, center[0]],
|
||||
[0, focal_length, center[1]],
|
||||
[0, 0, 1]
|
||||
], dtype=np.float64)
|
||||
|
||||
# Solve PnP
|
||||
dist_coeffs = np.zeros((4, 1))
|
||||
success, rotation_vector, translation_vector = cv2.solvePnP(
|
||||
model_points, image_points, camera_matrix, dist_coeffs
|
||||
)
|
||||
|
||||
return rotation_vector, translation_vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Face detection with 5-point landmarks
|
||||
- [Attributes Module](attributes.md) - Age, gender, emotion
|
||||
- [Gaze Module](gaze.md) - Gaze estimation
|
||||
- [Concepts: Coordinate Systems](../concepts/coordinate-systems.md) - Landmark formats
|
||||
265
docs/modules/parsing.md
Normal file
265
docs/modules/parsing.md
Normal file
@@ -0,0 +1,265 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components (skin, eyes, nose, mouth, hair, etc.).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Classes |
|
||||
|-------|----------|------|---------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save result
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 19 Facial Component Classes
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (ResNet18)
|
||||
parser = BiSeNet()
|
||||
|
||||
# Higher accuracy (ResNet34)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size |
|
||||
|---------|--------|------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB |
|
||||
| RESNET34 | 24.1M | 89 MB |
|
||||
|
||||
---
|
||||
|
||||
## Full Pipeline
|
||||
|
||||
### With Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(f"face_{i}_parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Specific Components
|
||||
|
||||
### Get Single Component Mask
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 18
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
# Binary mask for skin
|
||||
skin_mask = (mask == SKIN).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for hair
|
||||
hair_mask = (mask == HAIR).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for eyes
|
||||
eyes_mask = ((mask == LEFT_EYE) | (mask == RIGHT_EYE)).astype(np.uint8) * 255
|
||||
```
|
||||
|
||||
### Count Pixels per Component
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Glasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Hat', 10: 'Earring', 11: 'Nose', 12: 'Mouth',
|
||||
13: 'U-Lip', 14: 'L-Lip', 15: 'Neck', 16: 'Necklace',
|
||||
17: 'Cloth', 18: 'Hair'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
pixel_count = np.sum(mask == class_id)
|
||||
name = component_names.get(class_id, f'Class {class_id}')
|
||||
print(f"{name}: {pixel_count} pixels")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Applications
|
||||
|
||||
### Face Makeup
|
||||
|
||||
Apply virtual makeup using component masks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
|
||||
```python
|
||||
def replace_background(image, mask, background):
|
||||
"""Replace background using parsing mask."""
|
||||
# Create foreground mask (everything except background)
|
||||
foreground_mask = (mask != 0).astype(np.uint8)
|
||||
|
||||
# Resize background to match image
|
||||
background = cv2.resize(background, (image.shape[1], image.shape[0]))
|
||||
|
||||
# Combine
|
||||
result = image.copy()
|
||||
result[foreground_mask == 0] = background[foreground_mask == 0]
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Hair Segmentation
|
||||
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 18).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_CLOSE, kernel)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
return hair_mask
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
|
||||
# With different parameters
|
||||
vis_result = vis_parsing_maps(
|
||||
face_rgb,
|
||||
mask,
|
||||
save_image=False, # Don't save to file
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
|
||||
parser = create_face_parser() # Returns BiSeNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
277
docs/modules/privacy.md
Normal file
277
docs/modules/privacy.md
Normal file
@@ -0,0 +1,277 @@
|
||||
# Privacy
|
||||
|
||||
Face anonymization protects privacy by blurring or obscuring faces in images and videos.
|
||||
|
||||
---
|
||||
|
||||
## Available Methods
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| **pixelate** | Blocky pixelation |
|
||||
| **gaussian** | Smooth blur |
|
||||
| **blackout** | Solid color fill |
|
||||
| **elliptical** | Oval-shaped blur |
|
||||
| **median** | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Blur Methods
|
||||
|
||||
### Pixelate
|
||||
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 10 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
Smooth, natural-looking blur:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity (higher = more blur) |
|
||||
|
||||
### Blackout
|
||||
|
||||
Solid color fill for maximum privacy:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `color` | (0, 0, 0) | Fill color (BGR format) |
|
||||
|
||||
### Elliptical
|
||||
|
||||
Oval-shaped blur matching natural face shape:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=20)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
| `margin` | 20 | Margin around face |
|
||||
|
||||
### Median
|
||||
|
||||
Edge-preserving blur with artistic effect:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
|
||||
---
|
||||
|
||||
## In-Place Processing
|
||||
|
||||
Modify image directly (faster, saves memory):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
# In-place modification
|
||||
result = blurrer.anonymize(image, faces, inplace=True)
|
||||
# 'image' and 'result' point to the same array
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Anonymization
|
||||
|
||||
### Webcam
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Video File
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input_video.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (width, height))
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Selective Anonymization
|
||||
|
||||
### Exclude Specific Faces
|
||||
|
||||
```python
|
||||
def anonymize_except(image, all_faces, exclude_embeddings, recognizer, threshold=0.6):
|
||||
"""Anonymize all faces except those matching exclude_embeddings."""
|
||||
faces_to_blur = []
|
||||
|
||||
for face in all_faces:
|
||||
# Get embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Check if should be excluded
|
||||
should_exclude = False
|
||||
for ref_emb in exclude_embeddings:
|
||||
similarity = np.dot(embedding, ref_emb.T)[0][0]
|
||||
if similarity > threshold:
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if not should_exclude:
|
||||
faces_to_blur.append(face)
|
||||
|
||||
# Blur remaining faces
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
### Confidence-Based
|
||||
|
||||
```python
|
||||
def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
"""Anonymize faces below confidence threshold."""
|
||||
faces_to_blur = [f for f in faces if f.confidence < confidence_threshold]
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparison
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
methods = ['pixelate', 'gaussian', 'blackout', 'elliptical', 'median']
|
||||
|
||||
for method in methods:
|
||||
blurrer = BlurFace(method=method)
|
||||
result = blurrer.anonymize(image.copy(), faces)
|
||||
cv2.imwrite(f"anonymized_{method}.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anonymize Stream Recipe](../recipes/anonymize-stream.md) - Video pipeline
|
||||
- [Detection](detection.md) - Face detection options
|
||||
- [Batch Processing Recipe](../recipes/batch-processing.md) - Process multiple files
|
||||
292
docs/modules/recognition.md
Normal file
292
docs/modules/recognition.md
Normal file
@@ -0,0 +1,292 @@
|
||||
# Recognition
|
||||
|
||||
Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Embedding Dim |
|
||||
|-------|----------|------|---------------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 |
|
||||
|
||||
---
|
||||
|
||||
## AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_18)
|
||||
|
||||
# High accuracy
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_101)
|
||||
```
|
||||
|
||||
| Variant | Dataset | Size | IJB-B | IJB-C |
|
||||
|---------|---------|------|-------|-------|
|
||||
| **IR_18** :material-check-circle: | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| IR_101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Benchmark Metrics"
|
||||
IJB-B and IJB-C accuracy reported as TAR@FAR=0.01%
|
||||
|
||||
---
|
||||
|
||||
## ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.MNET)
|
||||
|
||||
# High accuracy
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
```
|
||||
|
||||
| Variant | Backbone | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
|---------|----------|------|-----|--------|----------|-------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| RESNET | ResNet50 | 166 MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data & Metrics"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
# Higher accuracy
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V3_LARGE)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
---
|
||||
|
||||
## SphereFace
|
||||
|
||||
Face recognition using angular softmax loss (A-Softmax).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
---
|
||||
|
||||
## Face Comparison
|
||||
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, landmarks1)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, landmarks2)
|
||||
|
||||
# Method 1: Using utility function
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Method 2: Direct computation
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Threshold Guidelines
|
||||
|
||||
| Threshold | Decision | Use Case |
|
||||
|-----------|----------|----------|
|
||||
| > 0.7 | Very high confidence | Security-critical |
|
||||
| > 0.6 | Same person | General verification |
|
||||
| 0.4 - 0.6 | Uncertain | Manual review needed |
|
||||
| < 0.4 | Different people | Rejection |
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Recognition models require aligned faces. UniFace handles this internally:
|
||||
|
||||
```python
|
||||
# Alignment is done automatically
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Building a Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Save for later use
|
||||
np.savez('face_database.npz', **database)
|
||||
|
||||
# Load database
|
||||
data = np.load('face_database.npz')
|
||||
database = {key: data[key] for key in data.files}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Search
|
||||
|
||||
Find a person in a database:
|
||||
|
||||
```python
|
||||
def search_face(query_embedding, database, threshold=0.6):
|
||||
"""Find best match in database."""
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
|
||||
if similarity > best_similarity and similarity > threshold:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
return best_match, best_similarity
|
||||
|
||||
# Usage
|
||||
query_embedding = recognizer.get_normalized_embedding(query_image, landmarks)
|
||||
match, similarity = search_face(query_embedding, database)
|
||||
|
||||
if match:
|
||||
print(f"Found: {match} (similarity: {similarity:.4f})")
|
||||
else:
|
||||
print("No match found")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
recognizer = create_recognizer('adaface')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Detect faces first
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Complete search system
|
||||
- [Thresholds](../concepts/thresholds-calibration.md) - Calibration guide
|
||||
266
docs/modules/spoofing.md
Normal file
266
docs/modules/spoofing.md
Normal file
@@ -0,0 +1,266 @@
|
||||
# Anti-Spoofing
|
||||
|
||||
Face anti-spoofing detects whether a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Size |
|
||||
|-------|------|
|
||||
| MiniFASNet V1SE | 1.2 MB |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# SpoofingResult dataclass
|
||||
result.is_real # True = real, False = fake
|
||||
result.confidence # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
```
|
||||
|
||||
| Variant | Size | Scale Factor |
|
||||
|---------|------|--------------|
|
||||
| V1SE | 1.2 MB | 4.0 |
|
||||
| **V2** :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
---
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
The default threshold is 0.5. Adjust for your use case:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced
|
||||
if result.is_real: # Uses default 0.5 threshold
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_spoofing_result(image, face, result):
|
||||
"""Draw spoofing result on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Color based on result
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
# Draw label
|
||||
text = f"{label}: {result.confidence:.1%}"
|
||||
cv2.putText(image, text, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
image = draw_spoofing_result(image, face, result)
|
||||
|
||||
cv2.imwrite("spoofing_result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Liveness Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
|
||||
# Draw result
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = f"{'Real' if result.is_real else 'Fake'}: {result.confidence:.0%}"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Liveness Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Access Control
|
||||
|
||||
```python
|
||||
def verify_liveness(image, face, spoofer, threshold=0.6):
|
||||
"""Verify face is real for access control."""
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
if result.is_real and result.confidence > threshold:
|
||||
return True, result.confidence
|
||||
return False, result.confidence
|
||||
|
||||
# Usage
|
||||
is_live, confidence = verify_liveness(image, face, spoofer)
|
||||
if is_live:
|
||||
print(f"Access granted (confidence: {confidence:.1%})")
|
||||
else:
|
||||
print(f"Access denied - possible spoof attempt")
|
||||
```
|
||||
|
||||
### Multi-Frame Verification
|
||||
|
||||
For higher security, verify across multiple frames:
|
||||
|
||||
```python
|
||||
def verify_liveness_multiframe(frames, detector, spoofer, min_real=3):
|
||||
"""Verify liveness across multiple frames."""
|
||||
real_count = 0
|
||||
|
||||
for frame in frames:
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
continue
|
||||
|
||||
result = spoofer.predict(frame, faces[0].bbox)
|
||||
if result.is_real:
|
||||
real_count += 1
|
||||
|
||||
return real_count >= min_real
|
||||
|
||||
# Collect frames and verify
|
||||
frames = []
|
||||
for _ in range(5):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frames.append(frame)
|
||||
|
||||
is_verified = verify_liveness_multiframe(frames, detector, spoofer)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attack Types Detected
|
||||
|
||||
MiniFASNet can detect various spoof attacks:
|
||||
|
||||
| Attack Type | Detection |
|
||||
|-------------|-----------|
|
||||
| Printed photos | ✅ |
|
||||
| Screen replay | ✅ |
|
||||
| Video replay | ✅ |
|
||||
| Paper masks | ✅ |
|
||||
| 3D masks | Limited |
|
||||
|
||||
!!! warning "Limitations"
|
||||
- High-quality 3D masks may not be detected
|
||||
- Performance varies with lighting and image quality
|
||||
- Always combine with other verification methods for high-security applications
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
- [Recognition](recognition.md) - Face recognition
|
||||
57
docs/notebooks.md
Normal file
57
docs/notebooks.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Interactive Notebooks
|
||||
|
||||
Run UniFace examples directly in your browser with Google Colab, or download and run locally with Jupyter.
|
||||
|
||||
---
|
||||
|
||||
## Available Notebooks
|
||||
|
||||
| Notebook | Colab | Description |
|
||||
|----------|:-----:|-------------|
|
||||
| [Face Detection](https://github.com/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Detect faces and 5-point landmarks |
|
||||
| [Face Alignment](https://github.com/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Align faces for recognition |
|
||||
| [Face Verification](https://github.com/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [Face Search](https://github.com/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one face analysis |
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
Download and run notebooks on your machine:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dependencies
|
||||
pip install uniface jupyter
|
||||
|
||||
# Launch Jupyter
|
||||
jupyter notebook examples/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running on Google Colab
|
||||
|
||||
Click any **"Open in Colab"** badge above. The notebooks automatically:
|
||||
|
||||
1. Install UniFace via pip
|
||||
2. Clone the repository to access test images
|
||||
3. Set up the correct working directory
|
||||
|
||||
!!! tip "GPU Acceleration"
|
||||
In Colab, go to **Runtime → Change runtime type → GPU** for faster inference.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart](quickstart.md) - Code snippets for common use cases
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step workflow guides
|
||||
- [API Reference](modules/detection.md) - Detailed module documentation
|
||||
5
docs/overrides/home.html
Normal file
5
docs/overrides/home.html
Normal file
@@ -0,0 +1,5 @@
|
||||
{% extends "main.html" %}
|
||||
|
||||
{% block source %}
|
||||
<!-- Hide edit/view source on home page -->
|
||||
{% endblock %}
|
||||
426
docs/quickstart.md
Normal file
426
docs/quickstart.md
Normal file
@@ -0,0 +1,426 @@
|
||||
# Quickstart
|
||||
|
||||
Get up and running with UniFace in 5 minutes. This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualize Detections
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
```
|
||||
|
||||
!!! tip "Similarity Thresholds"
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## Age & Gender Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace Attributes
|
||||
|
||||
Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmarks (106 Points)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing
|
||||
|
||||
Segment face into semantic components:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Anonymization
|
||||
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `pixelate` | Blocky effect (news media standard) |
|
||||
| `gaussian` | Smooth, natural blur |
|
||||
| `blackout` | Solid color boxes (maximum privacy) |
|
||||
| `elliptical` | Soft oval blur (natural face shape) |
|
||||
| `median` | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Face Anti-Spoofing
|
||||
|
||||
Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Webcam Demo
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks)
|
||||
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
|
||||
**Available models by task:**
|
||||
|
||||
| Task | Available Models |
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
| Anti-Spoofing | `MiniFASNet` (V1SE, V2) |
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Models Not Downloading
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Zoo](models.md) - All models, benchmarks, and selection guide
|
||||
- [API Reference](modules/detection.md) - Explore individual modules and their APIs
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step examples for common workflows
|
||||
- [Guides](concepts/overview.md) - Learn about the architecture and design principles
|
||||
99
docs/recipes/anonymize-stream.md
Normal file
99
docs/recipes/anonymize-stream.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Anonymize Stream
|
||||
|
||||
Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
w, h = int(cap.get(3)), int(cap.get(4))
|
||||
|
||||
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Blur Methods
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
| Median | `BlurFace(method='median', blur_strength=3.0)` |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Privacy Module](../modules/privacy.md) - Privacy protection details
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Detection Module](../modules/detection.md) - Face detection
|
||||
83
docs/recipes/batch-processing.md
Normal file
83
docs/recipes/batch-processing.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Batch Processing
|
||||
|
||||
Process multiple images efficiently.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Batch Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
def process_directory(input_dir, output_dir):
|
||||
"""Process all images in a directory."""
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for image_path in input_path.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Process and save results
|
||||
# ... your code here ...
|
||||
|
||||
# Usage
|
||||
process_directory("input_images/", "output_images/")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## With Progress Bar
|
||||
|
||||
```python
|
||||
from tqdm import tqdm
|
||||
|
||||
for image_path in tqdm(image_files, desc="Processing"):
|
||||
# ... process image ...
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
embeddings = {}
|
||||
for image_path in Path("faces/").glob("*.jpg"):
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
embeddings[image_path.stem] = embedding
|
||||
|
||||
# Save embeddings
|
||||
np.savez("embeddings.npz", **embeddings)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Search through embeddings
|
||||
- [Image Pipeline](image-pipeline.md) - Full analysis pipeline
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
114
docs/recipes/custom-models.md
Normal file
114
docs/recipes/custom-models.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Custom Models
|
||||
|
||||
Add your own ONNX models to UniFace.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns for advanced users. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
UniFace is designed to be extensible. You can add custom ONNX models by:
|
||||
|
||||
1. Creating a class that inherits from the appropriate base class
|
||||
2. Implementing required methods
|
||||
3. Using the ONNX Runtime utilities provided by UniFace
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Detection Model
|
||||
|
||||
```python
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self._preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
faces = self._postprocess(outputs, image.shape)
|
||||
return faces
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
pass
|
||||
|
||||
def _postprocess(self, outputs, shape):
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface import face_alignment
|
||||
import numpy as np
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str):
|
||||
self.session = create_onnx_session(model_path)
|
||||
|
||||
def get_normalized_embedding(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
landmarks: np.ndarray
|
||||
) -> np.ndarray:
|
||||
# 1. Align face
|
||||
aligned = face_alignment(image, landmarks)
|
||||
|
||||
# 2. Preprocess
|
||||
input_tensor = self._preprocess(aligned)
|
||||
|
||||
# 3. Run inference
|
||||
embedding = self.session.run(None, {'input': input_tensor})[0]
|
||||
|
||||
# 4. Normalize
|
||||
embedding = embedding / np.linalg.norm(embedding)
|
||||
return embedding
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from my_module import MyDetector, MyRecognizer
|
||||
|
||||
# Use custom models
|
||||
detector = MyDetector("path/to/detection_model.onnx")
|
||||
recognizer = MyRecognizer("path/to/recognition_model.onnx")
|
||||
|
||||
# Use like built-in models
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Built-in detection models
|
||||
- [Recognition Module](../modules/recognition.md) - Built-in recognition models
|
||||
- [Concepts: Overview](../concepts/overview.md) - Architecture overview
|
||||
178
docs/recipes/face-search.md
Normal file
178
docs/recipes/face-search.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Face Search
|
||||
|
||||
Build a face search system for finding people in images.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.embeddings = {}
|
||||
|
||||
def add_face(self, person_id, image):
|
||||
"""Add a face to the database."""
|
||||
faces = self.detector.detect(image)
|
||||
if not faces:
|
||||
raise ValueError(f"No face found for {person_id}")
|
||||
|
||||
face = max(faces, key=lambda f: f.confidence)
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
self.embeddings[person_id] = embedding
|
||||
return True
|
||||
|
||||
def search(self, image, threshold=0.6):
|
||||
"""Search for faces in an image."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in self.embeddings.items():
|
||||
similarity = np.dot(embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
results.append({
|
||||
'bbox': face.bbox,
|
||||
'match': best_match if best_similarity >= threshold else None,
|
||||
'similarity': best_similarity
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def save(self, path):
|
||||
"""Save database to file."""
|
||||
np.savez(path, embeddings=dict(self.embeddings))
|
||||
|
||||
def load(self, path):
|
||||
"""Load database from file."""
|
||||
data = np.load(path, allow_pickle=True)
|
||||
self.embeddings = data['embeddings'].item()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
|
||||
# Add faces
|
||||
for image_path in Path("known_faces/").glob("*.jpg"):
|
||||
person_id = image_path.stem
|
||||
image = cv2.imread(str(image_path))
|
||||
try:
|
||||
db.add_face(person_id, image)
|
||||
print(f"Added: {person_id}")
|
||||
except ValueError as e:
|
||||
print(f"Skipped: {e}")
|
||||
|
||||
# Save database
|
||||
db.save("face_database.npz")
|
||||
|
||||
# Search
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
results = db.search(query_image)
|
||||
|
||||
for r in results:
|
||||
if r['match']:
|
||||
print(f"Found: {r['match']} (similarity: {r['similarity']:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def visualize_search_results(image, results):
|
||||
"""Draw search results on image."""
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0) # Green for match
|
||||
label = f"{r['match']} ({r['similarity']:.2f})"
|
||||
else:
|
||||
color = (0, 0, 255) # Red for unknown
|
||||
label = f"Unknown ({r['similarity']:.2f})"
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
results = db.search(image)
|
||||
annotated = visualize_search_results(image.copy(), results)
|
||||
cv2.imwrite("search_result.jpg", annotated)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Search
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def realtime_search(db):
|
||||
"""Real-time face search from webcam."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
results = db.search(frame, threshold=0.5)
|
||||
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0)
|
||||
label = r['match']
|
||||
else:
|
||||
color = (0, 0, 255)
|
||||
label = "Unknown"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Search", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
db.load("face_database.npz")
|
||||
realtime_search(db)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
281
docs/recipes/image-pipeline.md
Normal file
281
docs/recipes/image-pipeline.md
Normal file
@@ -0,0 +1,281 @@
|
||||
# Image Pipeline
|
||||
|
||||
A complete pipeline for processing images with detection, recognition, and attribute analysis.
|
||||
|
||||
---
|
||||
|
||||
## Basic Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
def process_image(image_path):
|
||||
"""Process a single image through the full pipeline."""
|
||||
# Load image
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Step 1: Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f"Found {len(faces)} face(s)")
|
||||
|
||||
results = []
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Step 2: Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Step 3: Predict attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
results.append({
|
||||
'face_id': i,
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'embedding': embedding,
|
||||
'gender': attrs.sex,
|
||||
'age': attrs.age
|
||||
})
|
||||
|
||||
print(f" Face {i+1}: {attrs.sex}, {attrs.age} years old")
|
||||
|
||||
# Visualize
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
return image, results
|
||||
|
||||
# Usage
|
||||
result_image, results = process_image("photo.jpg")
|
||||
cv2.imwrite("result.jpg", result_image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Using FaceAnalyzer
|
||||
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
# Process image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
# Access enriched Face objects
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Analysis Pipeline
|
||||
|
||||
Complete pipeline with all modules:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
# Initialize all models
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.age_gender = AgeGender()
|
||||
self.fairface = FairFace()
|
||||
self.landmarker = Landmark106()
|
||||
self.gaze = MobileGaze()
|
||||
self.parser = BiSeNet()
|
||||
self.spoofer = MiniFASNet()
|
||||
|
||||
def analyze(self, image):
|
||||
"""Run full analysis pipeline."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
result = {
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'landmarks_5': face.landmarks
|
||||
}
|
||||
|
||||
# Recognition embedding
|
||||
result['embedding'] = self.recognizer.get_normalized_embedding(
|
||||
image, face.landmarks
|
||||
)
|
||||
|
||||
# Attributes
|
||||
ag_result = self.age_gender.predict(image, face.bbox)
|
||||
result['age'] = ag_result.age
|
||||
result['gender'] = ag_result.sex
|
||||
|
||||
# FairFace attributes
|
||||
ff_result = self.fairface.predict(image, face.bbox)
|
||||
result['age_group'] = ff_result.age_group
|
||||
result['race'] = ff_result.race
|
||||
|
||||
# 106-point landmarks
|
||||
result['landmarks_106'] = self.landmarker.get_landmarks(
|
||||
image, face.bbox
|
||||
)
|
||||
|
||||
# Gaze estimation
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = self.gaze.estimate(face_crop)
|
||||
result['gaze_pitch'] = gaze_result.pitch
|
||||
result['gaze_yaw'] = gaze_result.yaw
|
||||
|
||||
# Face parsing
|
||||
if face_crop.size > 0:
|
||||
result['parsing_mask'] = self.parser.parse(face_crop)
|
||||
|
||||
# Anti-spoofing
|
||||
spoof_result = self.spoofer.predict(image, face.bbox)
|
||||
result['is_real'] = spoof_result.is_real
|
||||
result['spoof_confidence'] = spoof_result.confidence
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
# Usage
|
||||
pipeline = FaceAnalysisPipeline()
|
||||
results = pipeline.analyze(cv2.imread("photo.jpg"))
|
||||
|
||||
for i, r in enumerate(results):
|
||||
print(f"\nFace {i+1}:")
|
||||
print(f" Gender: {r['gender']}, Age: {r['age']}")
|
||||
print(f" Race: {r['race']}, Age Group: {r['age_group']}")
|
||||
print(f" Gaze: pitch={np.degrees(r['gaze_pitch']):.1f}°")
|
||||
print(f" Real: {r['is_real']} ({r['spoof_confidence']:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
gaze = MobileGaze()
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Age and gender
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
label = f"{attrs.sex}, {attrs.age}y"
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
|
||||
# Gaze
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = gaze.estimate(face_crop)
|
||||
draw_gaze(image, face.bbox, gaze_result.pitch, gaze_result.yaw)
|
||||
|
||||
# Confidence
|
||||
conf_label = f"{face.confidence:.0%}"
|
||||
cv2.putText(image, conf_label, (x1, y2 + 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
# Usage
|
||||
visualize_analysis("input.jpg", "output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JSON Output
|
||||
|
||||
Export results to JSON:
|
||||
|
||||
```python
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
def results_to_json(results):
|
||||
"""Convert analysis results to JSON-serializable format."""
|
||||
output = []
|
||||
|
||||
for r in results:
|
||||
item = {
|
||||
'bbox': r['bbox'].tolist(),
|
||||
'confidence': float(r['confidence']),
|
||||
'age': int(r['age']) if r.get('age') else None,
|
||||
'gender': r.get('gender'),
|
||||
'race': r.get('race'),
|
||||
'is_real': r.get('is_real'),
|
||||
'gaze': {
|
||||
'pitch_deg': float(np.degrees(r['gaze_pitch'])) if 'gaze_pitch' in r else None,
|
||||
'yaw_deg': float(np.degrees(r['gaze_yaw'])) if 'gaze_yaw' in r else None
|
||||
}
|
||||
}
|
||||
output.append(item)
|
||||
|
||||
return output
|
||||
|
||||
# Usage
|
||||
results = pipeline.analyze(image)
|
||||
json_data = results_to_json(results)
|
||||
|
||||
with open('results.json', 'w') as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Batch Processing](batch-processing.md) - Process multiple images
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Build a search system
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition details
|
||||
125
docs/recipes/video-webcam.md
Normal file
125
docs/recipes/video-webcam.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Video & Webcam
|
||||
|
||||
Real-time face analysis for video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# Setup output
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
# ... process and draw ...
|
||||
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
# Usage
|
||||
process_video("input.mp4", "output.mp4")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
|
||||
```python
|
||||
PROCESS_EVERY_N = 3 # Process every 3rd frame
|
||||
frame_count = 0
|
||||
last_faces = []
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if frame_count % PROCESS_EVERY_N == 0:
|
||||
last_faces = detector.detect(frame)
|
||||
frame_count += 1
|
||||
# Draw last_faces...
|
||||
```
|
||||
|
||||
### FPS Counter
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
prev_time = time.time()
|
||||
while True:
|
||||
curr_time = time.time()
|
||||
fps = 1 / (curr_time - prev_time)
|
||||
prev_time = curr_time
|
||||
|
||||
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
225
docs/stylesheets/extra.css
Normal file
225
docs/stylesheets/extra.css
Normal file
@@ -0,0 +1,225 @@
|
||||
/* UniFace Documentation - Custom Styles */
|
||||
|
||||
/* ===== Hero Section ===== */
|
||||
|
||||
.md-content .hero {
|
||||
text-align: center;
|
||||
padding: 3rem 1rem 2rem;
|
||||
margin: 0 auto;
|
||||
max-width: 900px;
|
||||
}
|
||||
|
||||
.hero-title {
|
||||
font-size: 3.5rem !important;
|
||||
font-weight: 800 !important;
|
||||
margin-bottom: 0.5rem !important;
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #7c4dff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.hero-tagline {
|
||||
font-size: 1.5rem;
|
||||
color: var(--md-default-fg-color);
|
||||
margin-bottom: 0.5rem !important;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
margin-bottom: 1.5rem !important;
|
||||
font-weight: 400;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
margin: 0.5rem 0.25rem;
|
||||
padding: 0.7rem 1.5rem;
|
||||
font-weight: 600;
|
||||
border-radius: 8px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.hero .md-button--primary {
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #5c6bc0 100%);
|
||||
border: none;
|
||||
box-shadow: 0 4px 14px rgba(63, 81, 181, 0.4);
|
||||
}
|
||||
|
||||
.hero .md-button--primary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 6px 20px rgba(63, 81, 181, 0.5);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid var(--md-primary-fg-color);
|
||||
background: transparent;
|
||||
color: var(--md-primary-fg-color);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary):hover {
|
||||
background: var(--md-primary-fg-color);
|
||||
border-color: var(--md-primary-fg-color);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Badge styling in hero */
|
||||
.hero p a img {
|
||||
margin: 0 3px;
|
||||
height: 24px !important;
|
||||
}
|
||||
|
||||
/* ===== Feature Grid ===== */
|
||||
.feature-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.5rem;
|
||||
border-radius: 12px;
|
||||
background: var(--md-code-bg-color);
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
transition: all 0.3s ease;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.feature-card::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 3px;
|
||||
background: linear-gradient(90deg, var(--md-primary-fg-color), #7c4dff);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.feature-card:hover {
|
||||
transform: translateY(-4px);
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.1);
|
||||
border-color: var(--md-primary-fg-color--light);
|
||||
}
|
||||
|
||||
.feature-card:hover::before {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.feature-card h3 {
|
||||
margin-top: 0 !important;
|
||||
margin-bottom: 0.75rem !important;
|
||||
font-size: 1rem !important;
|
||||
font-weight: 600;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.feature-card p {
|
||||
margin: 0;
|
||||
font-size: 0.875rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.feature-card a {
|
||||
display: inline-block;
|
||||
margin-top: 0.75rem;
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* ===== Next Steps Grid (2 columns) ===== */
|
||||
.next-steps-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card {
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card h3 {
|
||||
font-size: 1.1rem !important;
|
||||
}
|
||||
|
||||
/* ===== Dark Mode Adjustments ===== */
|
||||
[data-md-color-scheme="slate"] .hero-title {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .feature-card:hover {
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
box-shadow: 0 4px 14px rgba(124, 77, 255, 0.4);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary:hover {
|
||||
box-shadow: 0 6px 20px rgba(124, 77, 255, 0.5);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid rgba(255, 255, 255, 0.3);
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary):hover {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
border-color: rgba(255, 255, 255, 0.5);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* ===== Responsive Design ===== */
|
||||
@media (max-width: 1200px) {
|
||||
.feature-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.hero-title {
|
||||
font-size: 2.5rem !important;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.feature-grid,
|
||||
.next-steps-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
display: block;
|
||||
margin: 0.5rem auto;
|
||||
max-width: 200px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.hero-title {
|
||||
font-size: 2rem !important;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.25rem;
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
239
examples/02_face_alignment.ipynb
Normal file
239
examples/02_face_alignment.ipynb
Normal file
File diff suppressed because one or more lines are too long
271
examples/03_face_verification.ipynb
Normal file
271
examples/03_face_verification.ipynb
Normal file
File diff suppressed because one or more lines are too long
375
examples/04_face_search.ipynb
Normal file
375
examples/04_face_search.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/05_face_analyzer.ipynb
Normal file
324
examples/05_face_analyzer.ipynb
Normal file
File diff suppressed because one or more lines are too long
394
examples/06_face_parsing.ipynb
Normal file
394
examples/06_face_parsing.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/07_face_anonymization.ipynb
Normal file
324
examples/07_face_anonymization.ipynb
Normal file
File diff suppressed because one or more lines are too long
268
examples/08_gaze_estimation.ipynb
Normal file
268
examples/08_gaze_estimation.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
164
mkdocs.yml
Normal file
164
mkdocs.yml
Normal file
@@ -0,0 +1,164 @@
|
||||
site_name: UniFace
|
||||
site_description: All-in-One Face Analysis Library with ONNX Runtime
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_url: https://yakhyo.github.io/uniface
|
||||
|
||||
repo_name: yakhyo/uniface
|
||||
repo_url: https://github.com/yakhyo/uniface
|
||||
edit_uri: edit/main/docs/
|
||||
|
||||
copyright: Copyright © 2025 Yakhyokhuja Valikhujaev
|
||||
|
||||
theme:
|
||||
name: material
|
||||
custom_dir: docs/overrides
|
||||
palette:
|
||||
- media: "(prefers-color-scheme)"
|
||||
toggle:
|
||||
icon: material/link
|
||||
name: Switch to light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: black
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch-off-outline
|
||||
name: Switch to system preference
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- navigation.footer
|
||||
- navigation.indexes
|
||||
- navigation.instant
|
||||
- navigation.tracking
|
||||
- search.suggest
|
||||
- search.highlight
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
logo: material/book-open-page-variant
|
||||
repo: fontawesome/brands/git-alt
|
||||
admonition:
|
||||
note: octicons/tag-16
|
||||
abstract: octicons/checklist-16
|
||||
info: octicons/info-16
|
||||
tip: octicons/squirrel-16
|
||||
success: octicons/check-16
|
||||
question: octicons/question-16
|
||||
warning: octicons/alert-16
|
||||
failure: octicons/x-circle-16
|
||||
danger: octicons/zap-16
|
||||
bug: octicons/bug-16
|
||||
example: octicons/beaker-16
|
||||
quote: octicons/quote-16
|
||||
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/yakhyo
|
||||
- icon: fontawesome/brands/python
|
||||
link: https://pypi.org/project/uniface/
|
||||
- icon: fontawesome/brands/x-twitter
|
||||
link: https://x.com/y_valikhujaev
|
||||
analytics:
|
||||
provider: google
|
||||
property: G-FGEHR2K5ZE
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- footnotes
|
||||
- attr_list
|
||||
- md_in_html
|
||||
- def_list
|
||||
- tables
|
||||
- toc:
|
||||
permalink: false
|
||||
toc_depth: 3
|
||||
- pymdownx.superfences:
|
||||
custom_fences:
|
||||
- name: mermaid
|
||||
class: mermaid
|
||||
format: !!python/name:pymdownx.superfences.fence_code_format
|
||||
- pymdownx.details
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- pymdownx.tasklist:
|
||||
custom_checkbox: true
|
||||
- pymdownx.keys
|
||||
- pymdownx.mark
|
||||
- pymdownx.critic
|
||||
- pymdownx.caret
|
||||
- pymdownx.tilde
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- git-committers:
|
||||
repository: yakhyo/uniface
|
||||
branch: main
|
||||
token: !ENV MKDOCS_GIT_COMMITTERS_APIKEY
|
||||
- git-revision-date-localized:
|
||||
enable_creation_date: true
|
||||
type: timeago
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Getting Started:
|
||||
- Installation: installation.md
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
- Face Search: recipes/face-search.md
|
||||
- Batch Processing: recipes/batch-processing.md
|
||||
- Anonymize Stream: recipes/anonymize-stream.md
|
||||
- Custom Models: recipes/custom-models.md
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
- Coordinate Systems: concepts/coordinate-systems.md
|
||||
- Execution Providers: concepts/execution-providers.md
|
||||
- Model Cache: concepts/model-cache-offline.md
|
||||
- Thresholds: concepts/thresholds-calibration.md
|
||||
- Resources:
|
||||
- Contributing: contributing.md
|
||||
- License: license-attribution.md
|
||||
- Releases: https://github.com/yakhyo/uniface/releases
|
||||
- Discussions: https://github.com/yakhyo/uniface/discussions
|
||||
@@ -1,19 +1,22 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.2.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
version = "2.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
requires-python = ">=3.11,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
@@ -21,16 +24,15 @@ keywords = [
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -69,7 +71,7 @@ uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
target-version = "py311"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
@@ -86,13 +88,60 @@ exclude = [
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = false
|
||||
warn_unused_ignores = true
|
||||
ignore_missing_imports = true
|
||||
exclude = ["tests/", "scripts/", "examples/"]
|
||||
# Disable strict return type checking for numpy operations
|
||||
disable_error_code = ["no-any-return"]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Emotion detection
|
||||
python scripts/run_emotion.py --image assets/test.jpg
|
||||
python scripts/run_emotion.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--detector` | Choose detector: `retinaface` or `scrfd` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.6) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
@@ -1,126 +0,0 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f' Face {i + 1}: {gender_str}, {age} years old')
|
||||
draw_age_gender_label(image, face['bbox'], gender_id, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
|
||||
draw_age_gender_label(frame, face['bbox'], gender_id, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,101 +0,0 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,124 +0,0 @@
|
||||
# Emotion detection on detected faces
|
||||
# Usage: python run_emotion.py --image path/to/image.jpg
|
||||
# python run_emotion.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
|
||||
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
|
||||
draw_emotion_label(image, face['bbox'], emotion, confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
|
||||
draw_emotion_label(frame, face['bbox'], emotion, confidence)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, emotion_predictor, args.threshold)
|
||||
else:
|
||||
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,116 +0,0 @@
|
||||
# Face analysis using FaceAnalyzer
|
||||
# Usage: python run_face_analyzer.py --image path/to/image.jpg
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.gender:
|
||||
lines.append(f'{face.gender}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.gender}, {face.age}y' if face.age and face.gender else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f'Error: Image not found: {args.image}')
|
||||
return
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,97 +0,0 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0]['landmarks']
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError('Webcam could not be opened.')
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
landmarks = face['landmarks']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--image', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.image}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,117 +0,0 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Face {i + 1}',
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,107 +0,0 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input video path')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,7 +1,15 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -24,19 +32,22 @@ def test_model_initialization(age_gender_model):
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
|
||||
assert isinstance(age, int), f'Age should be int, got {type(age)}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= age <= 120, f'Age out of range for image size {size}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
|
||||
assert age1 == age2, 'Same input should produce same age prediction'
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,25 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender_id, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for gender_id, age in results:
|
||||
assert gender_id in [0, 1]
|
||||
assert 0 <= age <= 120
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f'Age should be non-negative, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -35,8 +43,8 @@ def test_create_detector_with_config():
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
@@ -53,7 +61,7 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
@@ -141,13 +149,13 @@ def test_detect_faces_with_threshold():
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -246,8 +254,8 @@ def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
122
tests/test_parsing.py
Normal file
122
tests/test_parsing.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,9 +17,9 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
@@ -27,15 +35,15 @@ def test_inference_on_640x640_image(retinaface_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -45,7 +53,7 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,8 +17,8 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,15 +33,15 @@ def test_inference_on_640x640_image(scrfd_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -43,7 +51,7 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
@@ -63,7 +71,7 @@ def test_different_input_sizes(scrfd_model):
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
282
tests/test_types.py
Normal file
282
tests/test_types.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -116,7 +124,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -259,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
121
tools/README.md
Normal file
121
tools/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
```
|
||||
213
tools/age_gender.py
Normal file
213
tools/age_gender.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,5 +1,12 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
@@ -28,10 +35,12 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
202
tools/detection.py
Normal file
202
tools/detection.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face']
|
||||
)
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
elif args.method == 'yolov5face':
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
else: # yolov8face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
239
tools/face_analyzer.py
Normal file
239
tools/face_analyzer.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
tools/face_anonymize.py
Normal file
281
tools/face_anonymize.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
213
tools/face_emotion.py
Normal file
213
tools/face_emotion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
250
tools/face_parsing.py
Normal file
250
tools/face_parsing.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
bbox: np.ndarray,
|
||||
image_shape: tuple[int, int],
|
||||
expand_ratio: float = 0.2,
|
||||
expand_top_ratio: float = 0.4,
|
||||
) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Expand bounding box to include full head region for face parsing.
|
||||
|
||||
Face detection typically returns tight face boxes, but face parsing
|
||||
requires the full head including hair, ears, and neck.
|
||||
|
||||
Args:
|
||||
bbox: Original bounding box [x1, y1, x2, y2].
|
||||
image_shape: Image dimensions as (height, width).
|
||||
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
|
||||
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
height, width = image_shape[:2]
|
||||
|
||||
face_width = x2 - x1
|
||||
face_height = y2 - y1
|
||||
|
||||
expand_x = int(face_width * expand_ratio)
|
||||
expand_y_bottom = int(face_height * expand_ratio)
|
||||
expand_y_top = int(face_height * expand_top_ratio)
|
||||
|
||||
new_x1 = max(0, x1 - expand_x)
|
||||
new_y1 = max(0, y1 - expand_y_top)
|
||||
new_x2 = min(width, x2 + expand_x)
|
||||
new_y2 = min(height, y2 + expand_y_bottom)
|
||||
|
||||
return new_x1, new_y1, new_x2, new_y2
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
parser_arg.add_argument(
|
||||
'--expand-ratio',
|
||||
type=float,
|
||||
default=0.2,
|
||||
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/face_search.py
Normal file
190
tools/face_search.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.reference):
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/fairface.py
Normal file
214
tools/fairface.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/gaze_estimation.py
Normal file
190
tools/gaze_estimation.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
187
tools/landmarks.py
Normal file
187
tools/landmarks.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,6 +1,13 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
214
tools/spoofing.py
Normal file
214
tools/spoofing.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face Anti-Spoofing Detection.
|
||||
|
||||
Usage:
|
||||
python tools/spoofing.py --source path/to/image.jpg
|
||||
python tools/spoofing.py --source path/to/video.mp4
|
||||
python tools/spoofing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
bbox: list,
|
||||
is_real: bool,
|
||||
confidence: float,
|
||||
thickness: int = 2,
|
||||
) -> None:
|
||||
"""Draw bounding box with anti-spoofing result.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box in [x1, y1, x2, y2] format.
|
||||
is_real: True if real face, False if fake.
|
||||
confidence: Confidence score (0.0 to 1.0).
|
||||
thickness: Line thickness for bounding box.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
|
||||
color = (0, 255, 0) if is_real else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
label = 'Real' if is_real else 'Fake'
|
||||
text = f'{label}: {confidence:.1%}'
|
||||
|
||||
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a single image for face anti-spoofing detection."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print('No faces detected in the image.')
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f' Face {i}: {label} ({result.confidence:.1%})')
|
||||
|
||||
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a video file for face anti-spoofing detection."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
|
||||
"""Run real-time anti-spoofing detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
cv2.imshow('Face Anti-Spoofing', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
default='v2',
|
||||
choices=['v1se', 'v2'],
|
||||
help='Model variant: v1se or v2 (default: v2)',
|
||||
)
|
||||
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select model variant
|
||||
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
|
||||
|
||||
# Initialize models
|
||||
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
|
||||
detector = RetinaFace()
|
||||
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, spoofer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, spoofer, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, spoofer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
tools/video_detection.py
Normal file
180
tools/video_detection.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
@@ -11,36 +11,58 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""UniFace: A comprehensive library for face analysis.
|
||||
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
- Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
- Age, gender, and emotion prediction
|
||||
- Face anti-spoofing
|
||||
- Privacy/anonymization
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.2.0'
|
||||
|
||||
__version__ = '2.1.0'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender
|
||||
from .face import Face
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
YOLOv8Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .recognition import AdaFace, ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
# Metadata
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
@@ -49,28 +71,48 @@ __all__ = [
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
'create_detector',
|
||||
'create_face_parser',
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
'YOLOv5Face',
|
||||
'YOLOv8Face',
|
||||
# Recognition models
|
||||
'AdaFace',
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'GazeResult',
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'Logger',
|
||||
'enable_logging',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
@@ -1,76 +1,103 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes."""
|
||||
"""Unified face analyzer combining detection, recognition, and attributes.
|
||||
|
||||
This class provides a high-level interface for face analysis by combining
|
||||
multiple components: face detection, recognition (embedding extraction),
|
||||
and attribute prediction (age, gender, race).
|
||||
|
||||
Args:
|
||||
detector: Face detector instance for detecting faces in images.
|
||||
recognizer: Optional face recognizer for extracting embeddings.
|
||||
age_gender: Optional age/gender predictor.
|
||||
fairface: Optional FairFace predictor for demographics.
|
||||
|
||||
Example:
|
||||
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
>>> detector = RetinaFace()
|
||||
>>> recognizer = ArcFace()
|
||||
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
>>> faces = analyzer.analyze(image)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: Optional[BaseRecognizer] = None,
|
||||
age_gender: Optional[AgeGender] = None,
|
||||
recognizer: BaseRecognizer | None = None,
|
||||
age_gender: AgeGender | None = None,
|
||||
fairface: FairFace | None = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
self.fairface = fairface
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
if fairface:
|
||||
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
def analyze(self, image: np.ndarray) -> list[Face]:
|
||||
"""Analyze faces in an image.
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
Performs face detection and optionally extracts embeddings and
|
||||
predicts attributes for each detected face.
|
||||
|
||||
embedding = None
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
|
||||
Returns:
|
||||
List of Face objects with detection results and any predicted attributes.
|
||||
"""
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender_id = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender_id, age = self.age_gender.predict(image, bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender_str}')
|
||||
result = self.age_gender.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender_id=gender_id,
|
||||
)
|
||||
faces.append(face)
|
||||
if self.fairface is not None:
|
||||
try:
|
||||
result = self.fairface.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
@@ -81,4 +108,6 @@ class FaceAnalyzer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
if self.fairface:
|
||||
parts.append(f'fairface={self.fairface.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
@@ -20,19 +24,30 @@ except ImportError:
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
|
||||
__all__ = [
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
def create_attribute_predictor(
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
@@ -41,11 +56,13 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -54,46 +71,44 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
|
||||
f'Unsupported attribute model: {model_name}. '
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender_id, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender_id'] = gender_id
|
||||
face['attributes']['age'] = age
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return detections
|
||||
return faces
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
@@ -24,18 +24,30 @@ class AgeGender(Attribute):
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender ID (0 for Female,
|
||||
1 for Male) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
Defaults to `AgeGenderWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights
|
||||
to load.
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._user_input_size = input_size # Store user preference
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
@@ -47,7 +59,19 @@ class AgeGender(Attribute):
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
|
||||
# Use user-provided size if given, otherwise auto-detect from model
|
||||
model_input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
if self._user_input_size is not None:
|
||||
self.input_size = self._user_input_size
|
||||
if self._user_input_size != model_input_size:
|
||||
Logger.warning(
|
||||
f'Using custom input_size {self.input_size}, '
|
||||
f'but model expects {model_input_size}. This may affect accuracy.'
|
||||
)
|
||||
else:
|
||||
self.input_size = model_input_size
|
||||
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
@@ -57,7 +81,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
@@ -87,7 +111,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -95,16 +119,15 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
|
||||
and age (in years).
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender_id, age
|
||||
return AttributeResult(gender=gender, age=age)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -113,75 +136,8 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender_id, age = self.postprocess(prediction)
|
||||
return gender_id, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender_id, age = age_gender_predictor.predict(frame, box)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{gender_str}, {age}'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
return self.postprocess(prediction)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
@@ -7,6 +7,10 @@ from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.types import EmotionResult
|
||||
|
||||
__all__ = ['Emotion']
|
||||
|
||||
@@ -29,7 +29,7 @@ class Emotion(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
@@ -81,7 +81,7 @@ class Emotion(Attribute):
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
@@ -106,7 +106,7 @@ class Emotion(Attribute):
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
@@ -114,9 +114,9 @@ class Emotion(Attribute):
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
return EmotionResult(emotion=emotion_label, confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
@@ -127,68 +127,3 @@ class Emotion(Attribute):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{emotion} ({confidence:.2f})'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
|
||||
193
uniface/attribute/fairface.py
Normal file
193
uniface/attribute/fairface.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import FairFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
|
||||
|
||||
# Label definitions
|
||||
RACE_LABELS = [
|
||||
'White',
|
||||
'Black',
|
||||
'Latino Hispanic',
|
||||
'East Asian',
|
||||
'Southeast Asian',
|
||||
'Indian',
|
||||
'Middle Eastern',
|
||||
]
|
||||
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
|
||||
|
||||
|
||||
class FairFace(Attribute):
|
||||
"""
|
||||
FairFace attribute prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting race (7 categories), gender (2 categories),
|
||||
and age (9 groups) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
The model is trained on the FairFace dataset which provides balanced demographics
|
||||
for more equitable predictions across different racial and gender groups.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
Defaults to `FairFaceWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224). Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FairFace prediction model.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224).
|
||||
"""
|
||||
Logger.info(f'Initializing FairFace with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self.input_size = input_size if input_size is not None else (224, 224)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load FairFace model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Preprocesses the face image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
# Crop face if bbox provided
|
||||
if bbox is not None:
|
||||
bbox = np.asarray(bbox, dtype=int)
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
|
||||
# Add padding (25% of face size)
|
||||
w, h = x2 - x1, y2 - y1
|
||||
padding = 0.25
|
||||
x_pad = int(w * padding)
|
||||
y_pad = int(h * padding)
|
||||
|
||||
x1 = max(0, x1 - x_pad)
|
||||
y1 = max(0, y1 - y_pad)
|
||||
x2 = min(image.shape[1], x2 + x_pad)
|
||||
y2 = min(image.shape[0], y2 + y_pad)
|
||||
|
||||
image = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize to input size (width, height for cv2.resize)
|
||||
image = cv2.resize(image, self.input_size[::-1])
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize with ImageNet mean and std
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# Transpose to CHW format and add batch dimension
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract race, gender, and age.
|
||||
|
||||
Args:
|
||||
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
|
||||
(race_logits, gender_logits, age_logits).
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
|
||||
"""
|
||||
race_logits, gender_logits, age_logits = prediction
|
||||
|
||||
# Apply softmax
|
||||
race_probs = self._softmax(race_logits[0])
|
||||
gender_probs = self._softmax(gender_logits[0])
|
||||
age_probs = self._softmax(age_logits[0])
|
||||
|
||||
# Get predictions
|
||||
race_idx = int(np.argmax(race_probs))
|
||||
raw_gender_idx = int(np.argmax(gender_probs))
|
||||
age_idx = int(np.argmax(age_probs))
|
||||
|
||||
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
|
||||
gender = 1 - raw_gender_idx
|
||||
|
||||
return AttributeResult(
|
||||
gender=gender,
|
||||
age_group=AGE_LABELS[age_idx],
|
||||
race=RACE_LABELS[race_idx],
|
||||
)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
|
||||
"""
|
||||
Predicts race, gender, and age for a face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing:
|
||||
- gender: 0=Female, 1=Male
|
||||
- age_group: Age range string like "20-29"
|
||||
- race: Race/ethnicity label
|
||||
"""
|
||||
# Preprocess
|
||||
input_blob = self.preprocess(image, bbox)
|
||||
|
||||
# Inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
|
||||
|
||||
# Postprocess
|
||||
return self.postprocess(outputs)
|
||||
|
||||
@staticmethod
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
"""Compute softmax values for numerical stability."""
|
||||
exp_x = np.exp(x - np.max(x))
|
||||
return exp_x / np.sum(exp_x)
|
||||
@@ -1,35 +1,43 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
'resize_image',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
def resize_image(
|
||||
frame: np.ndarray,
|
||||
target_shape: tuple[int, int] = (640, 640),
|
||||
) -> tuple[np.ndarray, float]:
|
||||
"""Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
The image is resized to fit within the target dimensions and placed on a
|
||||
blank canvas (zero-padded to target size).
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
frame: Input image with shape (H, W, C).
|
||||
target_shape: Target size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
A tuple containing:
|
||||
- Resized image on a blank canvas with shape (height, width, 3).
|
||||
- The resize factor as a float.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
@@ -53,16 +61,16 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
image_size: Input image size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
|
||||
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
|
||||
"""
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
@@ -85,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
|
||||
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
|
||||
|
||||
Args:
|
||||
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
dets: Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold: IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
List[int]: Indices of bounding boxes retained after suppression.
|
||||
Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -125,18 +132,22 @@ def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time (RetinaFace specific).
|
||||
def decode_boxes(
|
||||
loc: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode locations from predictions using priors (RetinaFace specific).
|
||||
|
||||
Undoes the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
loc: Location predictions for loc layers, shape: [num_priors, 4].
|
||||
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
|
||||
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
|
||||
Decoded bounding box predictions with shape [num_priors, 4].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -155,18 +166,19 @@ def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[f
|
||||
|
||||
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
|
||||
predictions: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
"""Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
predictions: Landmark predictions, shape: [num_priors, 10].
|
||||
priors: Prior boxes, shape: [num_priors, 4].
|
||||
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
Decoded landmarks, shape: [num_priors, 10].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -187,18 +199,21 @@ def decode_landmarks(
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to bounding box (SCRFD specific).
|
||||
def distance2bbox(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom) with shape (n, 4).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to 4 boundaries
|
||||
(left, top, right, bottom) with shape (n, 4).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
@@ -219,17 +234,20 @@ def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to keypoints (SCRFD specific).
|
||||
def distance2kps(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded keypoints with shape (n, 2k).
|
||||
Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
@@ -33,6 +32,15 @@ class ArcFaceWeights(str, Enum):
|
||||
MNET = "arcface_mnet"
|
||||
RESNET = "arcface_resnet"
|
||||
|
||||
|
||||
class AdaFaceWeights(str, Enum):
|
||||
"""
|
||||
AdaFace model weights trained on WebFace datasets.
|
||||
https://github.com/yakhyo/adaface-onnx
|
||||
"""
|
||||
IR_18 = "adaface_ir_18"
|
||||
IR_101 = "adaface_ir_101"
|
||||
|
||||
class RetinaFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
@@ -62,11 +70,27 @@ class YOLOv5FaceWeights(str, Enum):
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov5-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV5S: 7.1M params, 28MB, 94.33% Easy / 92.61% Medium / 83.15% Hard
|
||||
- YOLOV5M: 21.1M params, 84MB, 95.30% Easy / 93.76% Medium / 85.28% Hard
|
||||
- YOLOV5N: 11MB, 93.61% Easy / 91.52% Medium / 80.53% Hard
|
||||
- YOLOV5S: 28MB, 94.33% Easy / 92.61% Medium / 83.15% Hard
|
||||
- YOLOV5M: 82MB, 95.30% Easy / 93.76% Medium / 85.28% Hard
|
||||
"""
|
||||
YOLOV5S = "yolov5s_face"
|
||||
YOLOV5M = "yolov5m_face"
|
||||
YOLOV5N = "yolov5n"
|
||||
YOLOV5S = "yolov5s"
|
||||
YOLOV5M = "yolov5m"
|
||||
|
||||
|
||||
class YOLOv8FaceWeights(str, Enum):
|
||||
"""
|
||||
YOLOv8-Face models trained on WIDER FACE dataset.
|
||||
Uses anchor-free design with DFL (Distribution Focal Loss) for bbox regression.
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov8-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV8_LITE_S: 7.4MB, 93.4% Easy / 91.2% Medium / 78.6% Hard (lightweight)
|
||||
- YOLOV8N: 12MB, 94.6% Easy / 92.3% Medium / 79.6% Hard (recommended)
|
||||
"""
|
||||
YOLOV8_LITE_S = "yolov8_lite_s"
|
||||
YOLOV8N = "yolov8n_face"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
@@ -86,6 +110,15 @@ class AgeGenderWeights(str, Enum):
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class FairFaceWeights(str, Enum):
|
||||
"""
|
||||
FairFace attribute prediction (race, gender, age).
|
||||
Trained on FairFace dataset with balanced demographics.
|
||||
https://github.com/yakhyo/fairface-onnx
|
||||
"""
|
||||
DEFAULT = "fairface"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
@@ -94,7 +127,44 @@ class LandmarkWeights(str, Enum):
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
class GazeWeights(str, Enum):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation models.
|
||||
Trained on Gaze360 dataset.
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
"""
|
||||
RESNET18 = "gaze_resnet18"
|
||||
RESNET34 = "gaze_resnet34"
|
||||
RESNET50 = "gaze_resnet50"
|
||||
MOBILENET_V2 = "gaze_mobilenetv2"
|
||||
MOBILEONE_S0 = "gaze_mobileone_s0"
|
||||
|
||||
|
||||
class ParsingWeights(str, Enum):
|
||||
"""
|
||||
Face Parsing: Semantic Segmentation of Facial Components.
|
||||
Trained on CelebAMask-HQ dataset.
|
||||
https://github.com/yakhyo/face-parsing
|
||||
"""
|
||||
RESNET18 = "parsing_resnet18"
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
class MiniFASNetWeights(str, Enum):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing models.
|
||||
Trained on face anti-spoofing datasets.
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Model Variants:
|
||||
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
|
||||
- V2: Uses scale=2.7 for face crop (improved version)
|
||||
"""
|
||||
V1SE = "minifasnet_v1se"
|
||||
V2 = "minifasnet_v2"
|
||||
|
||||
|
||||
MODEL_URLS: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
@@ -113,22 +183,43 @@ MODEL_URLS: Dict[Enum, str] = {
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# AdaFace
|
||||
AdaFaceWeights.IR_18: 'https://github.com/yakhyo/adaface-onnx/releases/download/weights/adaface_ir_18.onnx',
|
||||
AdaFaceWeights.IR_101: 'https://github.com/yakhyo/adaface-onnx/releases/download/weights/adaface_ir_101.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5n_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5s_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5M: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5m_face.onnx',
|
||||
# YOLOv8-Face
|
||||
YOLOv8FaceWeights.YOLOV8_LITE_S: 'https://github.com/yakhyo/yolov8-face-onnx-inference/releases/download/weights/yolov8-lite-s.onnx',
|
||||
YOLOv8FaceWeights.YOLOV8N: 'https://github.com/yakhyo/yolov8-face-onnx-inference/releases/download/weights/yolov8n-face.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
GazeWeights.RESNET18: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
|
||||
GazeWeights.RESNET34: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet34_gaze.onnx',
|
||||
GazeWeights.RESNET50: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet50_gaze.onnx',
|
||||
GazeWeights.MOBILENET_V2: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobilenetv2_gaze.onnx',
|
||||
GazeWeights.MOBILEONE_S0: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobileone_s0_gaze.onnx',
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
|
||||
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
MODEL_SHA256: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
@@ -147,19 +238,40 @@ MODEL_SHA256: Dict[Enum, str] = {
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# AdaFace
|
||||
AdaFaceWeights.IR_18: '6b6a35772fb636cdd4fa86520c1a259d0c41472a76f70f802b351837a00d9870',
|
||||
AdaFaceWeights.IR_101: 'f2eb07d03de0af560a82e1214df799fec5e09375d43521e2868f9dc387e5a43e',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'eb244a06e36999db732b317c2b30fa113cd6cfc1a397eaf738f2d6f33c01f640',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'fc682801cd5880e1e296184a14aea0035486b5146ec1a1389d2e7149cb134bb2',
|
||||
YOLOv5FaceWeights.YOLOV5M: '04302ce27a15bde3e20945691b688e2dd018a10e92dd8932146bede6a49207b2',
|
||||
# YOLOv8-Face
|
||||
YOLOv8FaceWeights.YOLOV8_LITE_S: '11bc496be01356d2d960085bfd8abb8f103199900a034f239a8a1705a1b31dba',
|
||||
YOLOv8FaceWeights.YOLOV8N: '33f3951af7fc0c4d9b321b29cdcd8c9a59d0a29a8d4bdc01fcb5507d5c714809',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
|
||||
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -1,48 +1,55 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
from .yolov8 import YOLOv8Face
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
# Global cache for detector instances (keyed by method name + config hash)
|
||||
_detector_cache: dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
|
||||
"""High-level face detection function.
|
||||
|
||||
Detects faces in an image using the specified detection method.
|
||||
Results are cached for repeated calls with the same configuration.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd'.
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
A list of Face objects, each containing:
|
||||
- bbox: [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence: The confidence score of the detection.
|
||||
- landmarks: 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> import cv2
|
||||
>>> image = cv2.imread('your_image.jpg')
|
||||
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
... print(f'Found face with confidence: {face.confidence}')
|
||||
... print(f'BBox: {face.bbox}')
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f'{method_name}_{str(sorted_kwargs)}'
|
||||
cache_key = f'{method_name}_{sorted_kwargs!s}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
@@ -52,50 +59,36 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
method: Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
|
||||
**kwargs: Detector-specific parameters
|
||||
- 'yolov8face': YOLOv8-Face detector (anchor-free, accurate)
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> from uniface.constants import SCRFDWeights
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... 'scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.8, input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... )
|
||||
|
||||
>>> # YOLOv5-Face detector
|
||||
>>> detector = create_detector(
|
||||
... 'yolov5face',
|
||||
... model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
... conf_thresh=0.25,
|
||||
... nms_thresh=0.45
|
||||
... )
|
||||
>>> # YOLOv8-Face detector
|
||||
>>> from uniface.constants import YOLOv8FaceWeights
|
||||
>>> detector = create_detector('yolov8face', model_name=YOLOv8FaceWeights.YOLOV8N, confidence_threshold=0.5)
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
@@ -108,17 +101,20 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
elif method == 'yolov5face':
|
||||
return YOLOv5Face(**kwargs)
|
||||
|
||||
elif method == 'yolov8face':
|
||||
return YOLOv8Face(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face']
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face', 'yolov8face']
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
"""List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
Dictionary mapping detector names to their information including
|
||||
description, landmark support, paper reference, and default parameters.
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
@@ -127,8 +123,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -138,8 +134,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -149,8 +145,19 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'conf_thresh': 0.25,
|
||||
'nms_thresh': 0.45,
|
||||
'confidence_threshold': 0.25,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
'yolov8face': {
|
||||
'description': 'YOLOv8-Face detector - anchor-free design with DFL for accurate detection',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://github.com/derronqi/yolov8-face',
|
||||
'default_params': {
|
||||
'model_name': 'yolov8n_face',
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
@@ -158,11 +165,12 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'BaseDetector',
|
||||
'RetinaFace',
|
||||
'YOLOv5Face',
|
||||
'BaseDetector',
|
||||
'YOLOv8Face',
|
||||
'create_detector',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
]
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user