mirror of
https://github.com/yakhyo/uniface.git
synced 2026-05-15 12:57:55 +00:00
Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f897482d26 | ||
|
|
f3d81eb201 | ||
|
|
ea0b56f7e0 | ||
|
|
edbab5f7bf | ||
|
|
cd8077e460 | ||
|
|
452b3381a2 | ||
|
|
07c8bd7b24 | ||
|
|
68179d1e2d | ||
|
|
99b35dddb4 | ||
|
|
3b6d0a35a9 | ||
|
|
0bd808bcef | ||
|
|
9edf8b6b3d | ||
|
|
efb40f2e91 | ||
|
|
376e7bc488 | ||
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc | ||
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 |
BIN
.github/logos/gaze_crop.png
vendored
Normal file
BIN
.github/logos/gaze_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
Normal file
BIN
.github/logos/gaze_org.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 673 KiB |
48
.github/workflows/ci.yml
vendored
48
.github/workflows/ci.yml
vendored
@@ -4,20 +4,43 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
include:
|
||||
# Full Python range on Linux (fastest runner)
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.13"
|
||||
- os: macos-latest
|
||||
python-version: "3.13"
|
||||
- os: windows-latest
|
||||
python-version: "3.13"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +50,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +61,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +79,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +101,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
38
.github/workflows/docs.yml
vendored
Normal file
38
.github/workflows/docs.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Deploy docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0 # Fetch full history for git-committers and git-revision-date plugins
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install mkdocs-material pymdown-extensions mkdocs-git-committers-plugin-2 mkdocs-git-revision-date-localized-plugin
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
MKDOCS_GIT_COMMITTERS_APIKEY: ${{ secrets.MKDOCS_GIT_COMMITTERS_APIKEY }}
|
||||
run: mkdocs build --strict
|
||||
|
||||
- name: Deploy to GitHub Pages
|
||||
uses: peaceiris/actions-gh-pages@v4
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs
|
||||
17
.github/workflows/publish.yml
vendored
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11" # Needs 3.11+ for tomllib
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
41
.pre-commit-config.yaml
Normal file
41
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
exclude: ^mkdocs.yml$
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
162
CONTRIBUTING.md
162
CONTRIBUTING.md
@@ -16,16 +16,9 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
### Code Style
|
||||
|
||||
- Follow PEP8 guidelines
|
||||
- Use type hints (Python 3.10+)
|
||||
- Write docstrings for public APIs
|
||||
- Keep code simple and readable
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
@@ -34,27 +27,164 @@ cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.10+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.10+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| Face Alignment | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| Face Recognition | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| Face Verification | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| Face Search | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
|
||||
|
||||
|
||||
|
||||
341
MODELS.md
341
MODELS.md
@@ -1,341 +0,0 @@
|
||||
# UniFace Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Default (recommended)
|
||||
detector = RetinaFace() # Uses MNET_V2
|
||||
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ---- | ------ | ------ | ------ | ------------------------------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| `YOLOV5S` ⭐ | 28MB | 94.33% | 92.61% | 83.15% | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight/Mobile
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5N,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# Real-time detection (recommended)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5M,
|
||||
conf_thresh=0.6
|
||||
)
|
||||
|
||||
# Detect faces with landmarks
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks (5, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Default (MobileNet backbone)
|
||||
recognizer = ArcFace()
|
||||
|
||||
# High accuracy (ResNet50 backbone)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
# Returns: (1, 512) normalized embedding vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
# Returns: (106, 2) array of (x, y) coordinates
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
- Eyes: 63-86 (24 points)
|
||||
- Mouth: 87-105 (19 points)
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: (gender, age_in_years)
|
||||
# gender: 0 for Female, 1 for Male
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Note**: Emotion detection accuracy depends heavily on facial expression clarity and cultural context
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
|
||||
### Manual Model Management
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download specific model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_cache'
|
||||
)
|
||||
|
||||
# Models are verified with SHA-256 checksums
|
||||
```
|
||||
|
||||
### Download All Models
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
403
QUICKSTART.md
403
QUICKSTART.md
@@ -1,403 +0,0 @@
|
||||
# UniFace Quick Start Guide
|
||||
|
||||
Get up and running with UniFace in 5 minutes! This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# macOS (Apple Silicon) - automatically includes ARM64 optimizations
|
||||
pip install uniface
|
||||
|
||||
# Linux/Windows with NVIDIA GPU
|
||||
pip install uniface[gpu]
|
||||
|
||||
# CPU-only (all platforms)
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 1. Face Detection (30 seconds)
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. Visualize Detections (1 minute)
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
print("Saved output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. Face Recognition (2 minutes)
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print("No faces detected")
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## 4. Webcam Demo (2 minutes)
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Age & Gender Detection (2 minutes)
|
||||
|
||||
Detect age and gender:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender_str}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face and landmarks
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
# Process all images in a folder
|
||||
image_dir = Path("images/")
|
||||
output_dir = Path("output/")
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
for image_path in image_dir.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Save results
|
||||
output_path = output_dir / image_path.name
|
||||
# ... draw and save ...
|
||||
|
||||
print("Done!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD, YOLOv5Face
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# Real-time with high accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
```
|
||||
|
||||
### Recognition Models
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
# ArcFace (recommended for most use cases)
|
||||
recognizer = ArcFace() # Best accuracy
|
||||
|
||||
# MobileFace (lightweight for mobile/edge)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size
|
||||
|
||||
# SphereFace (angular margin approach)
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### 1. Models Not Downloading
|
||||
|
||||
```python
|
||||
# Manually download a model
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### 2. Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### 3. Slow Performance on Mac
|
||||
|
||||
The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### 4. Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Wrong imports
|
||||
from uniface import retinaface # Module, not class
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
### Jupyter Notebook Examples
|
||||
|
||||
Explore interactive examples for common tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
|
||||
### Additional Resources
|
||||
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
530
README.md
530
README.md
@@ -1,511 +1,129 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=80%>
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
> 💬 **Have questions?** [Chat with this codebase on DeepWiki](https://deepwiki.com/yakhyo/uniface) - AI-powered docs that let you ask anything about UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace, SCRFD, and YOLOv5-Face models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
- **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Facial Landmarks** — 106-point landmark localization
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes)
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Install (All Platforms)
|
||||
|
||||
```bash
|
||||
# Standard installation
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Platform-Specific Installation
|
||||
|
||||
#### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+.
|
||||
|
||||
#### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
# GPU support (CUDA)
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
|
||||
#### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
### Install from Source
|
||||
|
||||
```bash
|
||||
# From source
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
cd uniface && pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Face Detection
|
||||
## Quick Example
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Initialize detector
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("image.jpg")
|
||||
|
||||
# Detect faces
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}")
|
||||
```
|
||||
|
||||
### Face Recognition
|
||||
|
||||
```python
|
||||
from uniface import ArcFace, RetinaFace
|
||||
from uniface import compute_similarity
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect and extract embeddings
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Facial Landmarks
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"{gender_str}, {age} years old")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [**QUICKSTART.md**](QUICKSTART.md) - 5-minute getting started guide
|
||||
- [**MODELS.md**](MODELS.md) - Model zoo, benchmarks, and selection guide
|
||||
- [**Examples**](examples/) - Jupyter notebooks with detailed examples
|
||||
|
||||
---
|
||||
|
||||
## API Overview
|
||||
|
||||
### Factory Functions (Recommended)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS, # SCRFDWeights.SCRFD_500M_KPS
|
||||
conf_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
# Or with defaults settings: detector = SCRFD()
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
landmarker = Landmark106()
|
||||
```
|
||||
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, YOLOv5Face, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
)
|
||||
# Or detector = RetinaFace()
|
||||
|
||||
# YOLOv5-Face detection
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
# Or detector = YOLOv5Face
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
recognizer = MobileFace() # Lightweight alternative
|
||||
recognizer = SphereFace() # Angular softmax alternative
|
||||
```
|
||||
|
||||
### High-Level Detection API
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: retinaface, scrfd, yolov5face
|
||||
```
|
||||
|
||||
### Key Parameters (quick reference)
|
||||
|
||||
**Detection**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
|
||||
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
|
||||
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `conf_thresh=0.6`, `nms_thresh=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
|
||||
|
||||
**Recognition**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ----------------------------------------- | ------------------------------------- |
|
||||
| `ArcFace` | `model_name=ArcFaceWeights.MNET` | Returns 512-dim normalized embeddings |
|
||||
| `MobileFace` | `model_name=MobileFaceWeights.MNET_V2` | Lightweight embeddings |
|
||||
| `SphereFace` | `model_name=SphereFaceWeights.SPHERE20` | Angular softmax variant |
|
||||
|
||||
**Landmark & Attributes**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| --------------- | --------------------------------------------------------------------- | --------------------------------------- |
|
||||
| `Landmark106` | No required params | 106-point landmarks |
|
||||
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Requires bbox; ONNXRuntime |
|
||||
| `Emotion` | `model_weights=DDAMFNWeights.AFFECNET7`, `input_size=(112, 112)` | Requires 5-point landmarks; TorchScript |
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
| ------------------ | ------ | ------ | ------ | ---------------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| yolov5n_face | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| yolov5s_face | 94.33% | 92.61% | 83.15% | Real-time + accuracy |
|
||||
| yolov5m_face | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
_Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714), [YOLOv5-Face](https://arxiv.org/abs/2105.12931)_
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Examples
|
||||
## Documentation
|
||||
|
||||
📚 **Full documentation**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface/)
|
||||
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
| [Quickstart](https://yakhyo.github.io/uniface/quickstart/) | Get up and running in 5 minutes |
|
||||
| [Model Zoo](https://yakhyo.github.io/uniface/models/) | All models, benchmarks, and selection guide |
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
|
||||
### Jupyter Notebooks
|
||||
|
||||
Interactive examples covering common face analysis tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
|
||||
### Webcam Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Face Search System
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build face database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Search for a face
|
||||
query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
)
|
||||
|
||||
# Find best match
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
print(f"Best match: {best_match} (similarity: {best_similarity:.4f})")
|
||||
```
|
||||
|
||||
More examples in the [examples/](examples/) directory.
|
||||
|
||||
---
|
||||
|
||||
## Advanced Configuration
|
||||
|
||||
### Custom ONNX Runtime Providers
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Force CPU-only execution
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
# Internally uses create_onnx_session() which auto-selects best provider
|
||||
```
|
||||
|
||||
### Model Download and Caching
|
||||
|
||||
Models are automatically downloaded on first use and cached in `~/.uniface/models/`.
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download and verify a model
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./custom_models' # Custom cache directory
|
||||
)
|
||||
```
|
||||
|
||||
### Logging Configuration
|
||||
|
||||
```python
|
||||
from uniface import Logger
|
||||
import logging
|
||||
|
||||
# Set logging level
|
||||
Logger.setLevel(logging.DEBUG) # DEBUG, INFO, WARNING, ERROR
|
||||
|
||||
# Disable logging
|
||||
Logger.setLevel(logging.CRITICAL)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest
|
||||
|
||||
# Run with coverage
|
||||
pytest --cov=uniface --cov-report=html
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_retinaface.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Development
|
||||
|
||||
### Setup Development Environment
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install in editable mode with dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
```
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
| [01_face_detection.ipynb](examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Face detection and landmarks |
|
||||
| [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Face alignment for recognition |
|
||||
| [03_face_verification.ipynb](examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [04_face_search.ipynb](examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one analysis |
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
| Feature | Repository | Training | Description |
|
||||
|---------|------------|:--------:|-------------|
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | [x] | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | - | YOLOv5-Face ONNX Inference |
|
||||
| Detection | [yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | - | YOLOv8-Face ONNX Inference |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | [x] | MobileFace, SphereFace Training |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | [x] | BiSeNet Face Parsing |
|
||||
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | [x] | MobileGaze Training |
|
||||
| Anti-Spoofing | [face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | - | MiniFASNet Inference |
|
||||
| Attributes | [fairface-onnx](https://github.com/yakhyo/fairface-onnx) | - | FairFace ONNX Inference |
|
||||
|
||||
*SCRFD and ArcFace models are from [InsightFace](https://github.com/deepinsight/insightface).
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the [MIT License](LICENSE).
|
||||
|
||||
BIN
docs/assets/logo.png
Normal file
BIN
docs/assets/logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
BIN
docs/assets/logo.webp
Normal file
BIN
docs/assets/logo.webp
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 33 KiB |
191
docs/concepts/coordinate-systems.md
Normal file
191
docs/concepts/coordinate-systems.md
Normal file
@@ -0,0 +1,191 @@
|
||||
# Coordinate Systems
|
||||
|
||||
This page explains the coordinate formats used in UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Image Coordinates
|
||||
|
||||
All coordinates use **pixel-based, top-left origin**:
|
||||
|
||||
```
|
||||
(0, 0) ────────────────► x (width)
|
||||
│
|
||||
│ Image
|
||||
│
|
||||
▼
|
||||
y (height)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Bounding Box Format
|
||||
|
||||
Bounding boxes use `[x1, y1, x2, y2]` format (top-left and bottom-right corners):
|
||||
|
||||
```
|
||||
(x1, y1) ─────────────────┐
|
||||
│ │
|
||||
│ Face │
|
||||
│ │
|
||||
└─────────────────────┘ (x2, y2)
|
||||
```
|
||||
|
||||
### Accessing Coordinates
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Direct access
|
||||
x1, y1, x2, y2 = face.bbox
|
||||
|
||||
# As properties
|
||||
bbox_xyxy = face.bbox_xyxy # [x1, y1, x2, y2]
|
||||
bbox_xywh = face.bbox_xywh # [x1, y1, width, height]
|
||||
```
|
||||
|
||||
### Conversion
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# xyxy → xywh
|
||||
def xyxy_to_xywh(bbox):
|
||||
x1, y1, x2, y2 = bbox
|
||||
return np.array([x1, y1, x2 - x1, y2 - y1])
|
||||
|
||||
# xywh → xyxy
|
||||
def xywh_to_xyxy(bbox):
|
||||
x, y, w, h = bbox
|
||||
return np.array([x, y, x + w, y + h])
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Landmarks
|
||||
|
||||
### 5-Point Landmarks (Detection)
|
||||
|
||||
Returned by all detection models:
|
||||
|
||||
```python
|
||||
landmarks = face.landmarks # Shape: (5, 2)
|
||||
```
|
||||
|
||||
| Index | Point |
|
||||
|-------|-------|
|
||||
| 0 | Left Eye |
|
||||
| 1 | Right Eye |
|
||||
| 2 | Nose Tip |
|
||||
| 3 | Left Mouth Corner |
|
||||
| 4 | Right Mouth Corner |
|
||||
|
||||
```
|
||||
0 ● ● 1
|
||||
|
||||
● 2
|
||||
|
||||
3 ● ● 4
|
||||
```
|
||||
|
||||
### 106-Point Landmarks
|
||||
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
# Shape: (106, 2)
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Face Crop
|
||||
|
||||
To crop a face from an image:
|
||||
|
||||
```python
|
||||
def crop_face(image, bbox, margin=0):
|
||||
"""Crop face with optional margin."""
|
||||
h, w = image.shape[:2]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Add margin
|
||||
if margin > 0:
|
||||
bw, bh = x2 - x1, y2 - y1
|
||||
x1 = max(0, x1 - int(bw * margin))
|
||||
y1 = max(0, y1 - int(bh * margin))
|
||||
x2 = min(w, x2 + int(bw * margin))
|
||||
y2 = min(h, y2 + int(bh * margin))
|
||||
|
||||
return image[y1:y2, x1:x2]
|
||||
|
||||
# Usage
|
||||
face_crop = crop_face(image, face.bbox, margin=0.1)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Angles
|
||||
|
||||
Gaze estimation returns pitch and yaw in **radians**:
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Angles in radians
|
||||
pitch = result.pitch # Vertical: + = up, - = down
|
||||
yaw = result.yaw # Horizontal: + = right, - = left
|
||||
|
||||
# Convert to degrees
|
||||
import numpy as np
|
||||
pitch_deg = np.degrees(pitch)
|
||||
yaw_deg = np.degrees(yaw)
|
||||
```
|
||||
|
||||
**Angle Reference:**
|
||||
|
||||
```
|
||||
pitch = +90° (up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(left) │ (right)
|
||||
│
|
||||
pitch = -90° (down)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
# Output: 112x112 aligned face image
|
||||
```
|
||||
|
||||
The alignment transforms faces to a canonical pose for better recognition accuracy.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Data types reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
231
docs/concepts/execution-providers.md
Normal file
231
docs/concepts/execution-providers.md
Normal file
@@ -0,0 +1,231 @@
|
||||
# Execution Providers
|
||||
|
||||
UniFace uses ONNX Runtime for model inference, which supports multiple hardware acceleration backends.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Provider Selection
|
||||
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
2. **CoreMLExecutionProvider** - Apple Silicon
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
|
||||
## Explicit Provider Selection
|
||||
|
||||
You can specify which execution provider to use by passing the `providers` parameter:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Force CPU execution (even if GPU is available)
|
||||
detector = RetinaFace(providers=['CPUExecutionProvider'])
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
|
||||
# Use CUDA with CPU fallback
|
||||
detector = RetinaFace(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
All model classes accept the `providers` parameter:
|
||||
|
||||
- Detection: `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face`
|
||||
- Recognition: `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace`
|
||||
- Landmarks: `Landmark106`
|
||||
- Gaze: `MobileGaze`
|
||||
- Parsing: `BiSeNet`
|
||||
- Attributes: `AgeGender`, `FairFace`
|
||||
- Anti-Spoofing: `MiniFASNet`
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
providers = ort.get_available_providers()
|
||||
print("Available providers:", providers)
|
||||
```
|
||||
|
||||
**Example outputs:**
|
||||
|
||||
=== "macOS (Apple Silicon)"
|
||||
|
||||
```
|
||||
['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Linux (NVIDIA GPU)"
|
||||
|
||||
```
|
||||
['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
```
|
||||
|
||||
=== "Windows (CPU)"
|
||||
|
||||
```
|
||||
['CPUExecutionProvider']
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Setup
|
||||
|
||||
### Apple Silicon (M1/M2/M3/M4)
|
||||
|
||||
No additional setup required. ARM64 optimizations are built into `onnxruntime`:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Verify ARM64:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
!!! tip "Performance"
|
||||
Apple Silicon Macs use CoreML acceleration automatically, providing excellent performance for face analysis tasks.
|
||||
|
||||
---
|
||||
|
||||
### NVIDIA GPU (CUDA)
|
||||
|
||||
Install with GPU support:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- Compatible NVIDIA driver
|
||||
|
||||
Verify CUDA:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
|
||||
if 'CUDAExecutionProvider' in ort.get_available_providers():
|
||||
print("CUDA is available!")
|
||||
else:
|
||||
print("CUDA not available, using CPU")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU Fallback
|
||||
|
||||
CPU execution is always available:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms without additional configuration.
|
||||
|
||||
---
|
||||
|
||||
## Internal API
|
||||
|
||||
For advanced use cases, you can access the provider utilities:
|
||||
|
||||
```python
|
||||
from uniface.onnx_utils import get_available_providers, create_onnx_session
|
||||
|
||||
# Check available providers
|
||||
providers = get_available_providers()
|
||||
print(f"Available: {providers}")
|
||||
|
||||
# Models use create_onnx_session() internally
|
||||
# which auto-selects the best provider
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### 1. Use GPU When Available
|
||||
|
||||
For batch processing or real-time applications, GPU acceleration provides significant speedups:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### 2. Optimize Input Size
|
||||
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
```
|
||||
|
||||
### 3. Batch Processing
|
||||
|
||||
Process multiple images to maximize GPU utilization:
|
||||
|
||||
```python
|
||||
# Process images in batch (GPU-efficient)
|
||||
for image_path in image_paths:
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
# ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### CUDA Not Detected
|
||||
|
||||
1. Verify CUDA installation:
|
||||
```bash
|
||||
nvidia-smi
|
||||
```
|
||||
|
||||
2. Check CUDA version compatibility with ONNX Runtime
|
||||
|
||||
3. Reinstall with GPU support:
|
||||
```bash
|
||||
pip uninstall onnxruntime onnxruntime-gpu
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using ARM64 Python (not Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Cache & Offline](model-cache-offline.md) - Model management
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning parameters
|
||||
218
docs/concepts/inputs-outputs.md
Normal file
218
docs/concepts/inputs-outputs.md
Normal file
@@ -0,0 +1,218 @@
|
||||
# Inputs & Outputs
|
||||
|
||||
This page describes the data types used throughout UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Input: Images
|
||||
|
||||
All models accept NumPy arrays in **BGR format** (OpenCV default):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
# Load image (BGR format)
|
||||
image = cv2.imread("photo.jpg")
|
||||
print(f"Shape: {image.shape}") # (H, W, 3)
|
||||
print(f"Dtype: {image.dtype}") # uint8
|
||||
```
|
||||
|
||||
!!! warning "Color Format"
|
||||
UniFace expects **BGR** format (OpenCV default). If using PIL or other libraries, convert first:
|
||||
|
||||
```python
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
pil_image = Image.open("photo.jpg")
|
||||
bgr_image = np.array(pil_image)[:, :, ::-1] # RGB → BGR
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output: Face Dataclass
|
||||
|
||||
Detection returns a list of `Face` objects:
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
# Required (from detection)
|
||||
bbox: np.ndarray # [x1, y1, x2, y2]
|
||||
confidence: float # 0.0 to 1.0
|
||||
landmarks: np.ndarray # (5, 2) or (106, 2)
|
||||
|
||||
# Optional (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None # 0=Female, 1=Male
|
||||
age: int | None = None # Years
|
||||
age_group: str | None = None # "20-29", etc.
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
```
|
||||
|
||||
### Properties
|
||||
|
||||
```python
|
||||
face = faces[0]
|
||||
|
||||
# Bounding box formats
|
||||
face.bbox_xyxy # [x1, y1, x2, y2] - same as bbox
|
||||
face.bbox_xywh # [x1, y1, width, height]
|
||||
|
||||
# Gender as string
|
||||
face.sex # "Female" or "Male" (None if not predicted)
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Compute similarity with another face
|
||||
similarity = face1.compute_similarity(face2)
|
||||
|
||||
# Convert to dictionary
|
||||
face_dict = face.to_dict()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Result Types
|
||||
|
||||
### GazeResult
|
||||
|
||||
```python
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class GazeResult:
|
||||
pitch: float # Vertical angle (radians), + = up
|
||||
yaw: float # Horizontal angle (radians), + = right
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°")
|
||||
print(f"Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### SpoofingResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class SpoofingResult:
|
||||
is_real: bool # True = real, False = fake
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### AttributeResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class AttributeResult:
|
||||
gender: int # 0=Female, 1=Male
|
||||
age: int | None # Years (AgeGender model)
|
||||
age_group: str | None # "20-29" (FairFace model)
|
||||
race: str | None # Race label (FairFace model)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
return "Female" if self.gender == 0 else "Male"
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
|
||||
```python
|
||||
# AgeGender model
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
|
||||
# FairFace model
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### EmotionResult
|
||||
|
||||
```python
|
||||
@dataclass(frozen=True)
|
||||
class EmotionResult:
|
||||
emotion: str # "Happy", "Sad", etc.
|
||||
confidence: float # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Embeddings
|
||||
|
||||
Face recognition models return normalized 512-dimensional embeddings:
|
||||
|
||||
```python
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
print(f"Shape: {embedding.shape}") # (1, 512)
|
||||
print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
```
|
||||
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parsing Masks
|
||||
|
||||
Face parsing returns a segmentation mask:
|
||||
|
||||
```python
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Shape: {mask.shape}") # (H, W)
|
||||
print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
```
|
||||
|
||||
**19 Classes:**
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Coordinate Systems](coordinate-systems.md) - Bbox and landmark formats
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tuning confidence thresholds
|
||||
220
docs/concepts/model-cache-offline.md
Normal file
220
docs/concepts/model-cache-offline.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# Model Cache & Offline Use
|
||||
|
||||
UniFace automatically downloads and caches models. This page explains how model management works.
|
||||
|
||||
---
|
||||
|
||||
## Automatic Download
|
||||
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
|
||||
# Subsequent runs: loads from cache
|
||||
detector = RetinaFace() # Instant
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cache Location
|
||||
|
||||
Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
```
|
||||
|
||||
**Example structure:**
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mv2.onnx
|
||||
├── w600k_mbf.onnx
|
||||
├── 2d106det.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
|
||||
```bash
|
||||
python tools/download_model.py
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Offline Use
|
||||
|
||||
For air-gapped or offline environments:
|
||||
|
||||
### 1. Pre-download models
|
||||
|
||||
On a connected machine:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Copy from: {path}")
|
||||
```
|
||||
|
||||
### 2. Copy to target machine
|
||||
|
||||
```bash
|
||||
# Copy the entire cache directory
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Verification
|
||||
|
||||
Models are verified with SHA-256 checksums:
|
||||
|
||||
```python
|
||||
from uniface.constants import MODEL_SHA256, RetinaFaceWeights
|
||||
|
||||
# Check expected checksum
|
||||
expected = MODEL_SHA256[RetinaFaceWeights.MNET_V2]
|
||||
print(f"Expected SHA256: {expected}")
|
||||
```
|
||||
|
||||
If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
### Detection Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| RetinaFace MNET_025 | 1.7 MB | ✅ |
|
||||
| RetinaFace MNET_V2 | 3.5 MB | ✅ |
|
||||
| RetinaFace RESNET34 | 56 MB | ✅ |
|
||||
| SCRFD 500M | 2.5 MB | ✅ |
|
||||
| SCRFD 10G | 17 MB | ✅ |
|
||||
| YOLOv5n-Face | 11 MB | ✅ |
|
||||
| YOLOv5s-Face | 28 MB | ✅ |
|
||||
| YOLOv5m-Face | 82 MB | ✅ |
|
||||
| YOLOv8-Lite-S | 7.4 MB | ✅ |
|
||||
| YOLOv8n-Face | 12 MB | ✅ |
|
||||
|
||||
### Recognition Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| ArcFace MNET | 8 MB | ✅ |
|
||||
| ArcFace RESNET | 166 MB | ✅ |
|
||||
| MobileFace MNET_V2 | 4 MB | ✅ |
|
||||
| SphereFace SPHERE20 | 50 MB | ✅ |
|
||||
|
||||
### Other Models
|
||||
|
||||
| Model | Size | Download |
|
||||
|-------|------|----------|
|
||||
| Landmark106 | 14 MB | ✅ |
|
||||
| AgeGender | 8 MB | ✅ |
|
||||
| FairFace | 44 MB | ✅ |
|
||||
| Gaze ResNet34 | 82 MB | ✅ |
|
||||
| BiSeNet ResNet18 | 51 MB | ✅ |
|
||||
| MiniFASNet V2 | 1.2 MB | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
rm -rf ~/.uniface/models/
|
||||
|
||||
# Remove specific model
|
||||
rm ~/.uniface/models/retinaface_mv2.onnx
|
||||
```
|
||||
|
||||
Models will be re-downloaded on next use.
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Thresholds & Calibration](thresholds-calibration.md) - Tune model parameters
|
||||
- [Detection Module](../modules/detection.md) - Detection model details
|
||||
196
docs/concepts/overview.md
Normal file
196
docs/concepts/overview.md
Normal file
@@ -0,0 +1,196 @@
|
||||
# Overview
|
||||
|
||||
UniFace is designed as a modular, production-ready face analysis library. This page explains the architecture and design principles.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
UniFace follows a modular architecture where each face analysis task is handled by a dedicated module:
|
||||
|
||||
```mermaid
|
||||
graph TB
|
||||
subgraph Input
|
||||
IMG[Image/Frame]
|
||||
end
|
||||
|
||||
subgraph Detection
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face / YOLOv8Face]
|
||||
end
|
||||
|
||||
subgraph Analysis
|
||||
REC[Recognition]
|
||||
LMK[Landmarks]
|
||||
ATTR[Attributes]
|
||||
GAZE[Gaze]
|
||||
PARSE[Parsing]
|
||||
SPOOF[Anti-Spoofing]
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
|
||||
IMG --> DET
|
||||
DET --> REC
|
||||
DET --> LMK
|
||||
DET --> ATTR
|
||||
DET --> GAZE
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
### 1. ONNX-First
|
||||
|
||||
All models use ONNX Runtime for inference:
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
- **Production-ready**: No Python-only dependencies for inference
|
||||
|
||||
### 2. Minimal Dependencies
|
||||
|
||||
Core dependencies are kept minimal:
|
||||
|
||||
```
|
||||
numpy # Array operations
|
||||
opencv-python # Image processing
|
||||
onnxruntime # Model inference
|
||||
requests # Model download
|
||||
tqdm # Progress bars
|
||||
```
|
||||
|
||||
### 3. Simple API
|
||||
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
|
||||
Full type hints throughout:
|
||||
|
||||
```python
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── gaze/ # Gaze estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Workflow
|
||||
|
||||
A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# 2. Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# 3. Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# 4. Analyze each face
|
||||
for face in faces:
|
||||
# Recognition embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
print(f"Face: {attrs.sex}, {attrs.age} years")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FaceAnalyzer
|
||||
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
|
||||
```python
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Inputs & Outputs](inputs-outputs.md) - Understand data types
|
||||
- [Execution Providers](execution-providers.md) - Hardware acceleration
|
||||
- [Detection Module](../modules/detection.md) - Start with face detection
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
234
docs/concepts/thresholds-calibration.md
Normal file
234
docs/concepts/thresholds-calibration.md
Normal file
@@ -0,0 +1,234 @@
|
||||
# Thresholds & Calibration
|
||||
|
||||
This page explains how to tune detection and recognition thresholds for your use case.
|
||||
|
||||
---
|
||||
|
||||
## Detection Thresholds
|
||||
|
||||
### Confidence Threshold
|
||||
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
|
||||
# High precision (fewer false positives)
|
||||
detector = RetinaFace(confidence_threshold=0.8)
|
||||
|
||||
# High recall (catch more faces)
|
||||
detector = RetinaFace(confidence_threshold=0.3)
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
| Threshold | Use Case |
|
||||
|-----------|----------|
|
||||
| 0.3 - 0.4 | Maximum recall (research, analysis) |
|
||||
| 0.5 - 0.6 | Balanced (default, general use) |
|
||||
| 0.7 - 0.9 | High precision (production, security) |
|
||||
|
||||
---
|
||||
|
||||
### NMS Threshold
|
||||
|
||||
Non-Maximum Suppression removes overlapping detections:
|
||||
|
||||
```python
|
||||
# Default
|
||||
detector = RetinaFace(nms_threshold=0.4)
|
||||
|
||||
# Stricter (fewer overlapping boxes)
|
||||
detector = RetinaFace(nms_threshold=0.3)
|
||||
|
||||
# Looser (for crowded scenes)
|
||||
detector = RetinaFace(nms_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Input Size
|
||||
|
||||
Affects detection accuracy and speed:
|
||||
|
||||
```python
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(input_size=(640, 640))
|
||||
|
||||
# Higher accuracy, slower
|
||||
detector = RetinaFace(input_size=(1280, 1280))
|
||||
```
|
||||
|
||||
!!! tip "Dynamic Size"
|
||||
For RetinaFace, enable dynamic input for variable image sizes:
|
||||
```python
|
||||
detector = RetinaFace(dynamic_size=True)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recognition Thresholds
|
||||
|
||||
### Similarity Threshold
|
||||
|
||||
For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
# Threshold interpretation
|
||||
if similarity > 0.6:
|
||||
print("Same person (high confidence)")
|
||||
elif similarity > 0.4:
|
||||
print("Uncertain (manual review)")
|
||||
else:
|
||||
print("Different people")
|
||||
```
|
||||
|
||||
**Recommended thresholds:**
|
||||
|
||||
| Threshold | Decision | False Accept Rate |
|
||||
|-----------|----------|-------------------|
|
||||
| 0.4 | Low security | Higher FAR |
|
||||
| 0.5 | Balanced | Moderate FAR |
|
||||
| 0.6 | High security | Lower FAR |
|
||||
| 0.7 | Very strict | Very low FAR |
|
||||
|
||||
---
|
||||
|
||||
### Calibration for Your Dataset
|
||||
|
||||
Test on your data to find optimal thresholds:
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def calibrate_threshold(same_pairs, diff_pairs, recognizer, detector):
|
||||
"""Find optimal threshold for your dataset."""
|
||||
same_scores = []
|
||||
diff_scores = []
|
||||
|
||||
# Compute similarities for same-person pairs
|
||||
for img1_path, img2_path in same_pairs:
|
||||
img1 = cv2.imread(img1_path)
|
||||
img2 = cv2.imread(img2_path)
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if faces1 and faces2:
|
||||
emb1 = recognizer.get_normalized_embedding(img1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(img2, faces2[0].landmarks)
|
||||
same_scores.append(np.dot(emb1, emb2.T)[0][0])
|
||||
|
||||
# Compute similarities for different-person pairs
|
||||
for img1_path, img2_path in diff_pairs:
|
||||
# ... similar process
|
||||
diff_scores.append(similarity)
|
||||
|
||||
# Find optimal threshold
|
||||
thresholds = np.arange(0.3, 0.8, 0.05)
|
||||
best_threshold = 0.5
|
||||
best_accuracy = 0
|
||||
|
||||
for thresh in thresholds:
|
||||
tp = sum(1 for s in same_scores if s >= thresh)
|
||||
tn = sum(1 for s in diff_scores if s < thresh)
|
||||
accuracy = (tp + tn) / (len(same_scores) + len(diff_scores))
|
||||
|
||||
if accuracy > best_accuracy:
|
||||
best_accuracy = accuracy
|
||||
best_threshold = thresh
|
||||
|
||||
return best_threshold, best_accuracy
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Thresholds
|
||||
|
||||
The MiniFASNet model returns a confidence score:
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
spoofer = MiniFASNet()
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# Default threshold (0.5)
|
||||
if result.is_real: # confidence > 0.5
|
||||
print("Real face")
|
||||
|
||||
# Custom threshold for high security
|
||||
SPOOF_THRESHOLD = 0.7
|
||||
if result.confidence > SPOOF_THRESHOLD:
|
||||
print("Real face (high confidence)")
|
||||
else:
|
||||
print("Potentially fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attribute Model Confidence
|
||||
|
||||
### Emotion
|
||||
|
||||
```python
|
||||
result = emotion_predictor.predict(image, landmarks)
|
||||
|
||||
# Filter low-confidence predictions
|
||||
if result.confidence > 0.6:
|
||||
print(f"Emotion: {result.emotion}")
|
||||
else:
|
||||
print("Uncertain emotion")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Threshold
|
||||
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
scores = [f.confidence for f in faces if f.confidence > 0.7]
|
||||
landmarks = [f.landmarks for f in faces if f.confidence > 0.7]
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6 # Additional visualization filter
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
| Parameter | Default | Range | Lower = | Higher = |
|
||||
|-----------|---------|-------|---------|----------|
|
||||
| `confidence_threshold` | 0.5 | 0.1-0.9 | More detections | Fewer false positives |
|
||||
| `nms_threshold` | 0.4 | 0.1-0.7 | Fewer overlaps | More overlapping boxes |
|
||||
| Similarity threshold | 0.6 | 0.3-0.8 | More matches (FAR↑) | Fewer matches (FRR↑) |
|
||||
| Spoof confidence | 0.5 | 0.3-0.9 | More "real" | Stricter liveness |
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Detection model options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition model options
|
||||
72
docs/contributing.md
Normal file
72
docs/contributing.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Contributing
|
||||
|
||||
Thank you for contributing to UniFace!
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dev dependencies
|
||||
pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Code Style
|
||||
|
||||
We use [Ruff](https://docs.astral.sh/ruff/) for formatting:
|
||||
|
||||
```bash
|
||||
ruff format .
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
|
||||
- Line length: 120
|
||||
- Python 3.10+ type hints
|
||||
- Google-style docstrings
|
||||
|
||||
---
|
||||
|
||||
## Pre-commit Hooks
|
||||
|
||||
```bash
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pull Request Process
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Write tests for new features
|
||||
4. Ensure tests pass
|
||||
5. Submit PR with clear description
|
||||
|
||||
---
|
||||
|
||||
## Adding New Models
|
||||
|
||||
1. Create model class in appropriate submodule
|
||||
2. Add weight constants to `uniface/constants.py`
|
||||
3. Export in `__init__.py` files
|
||||
4. Write tests in `tests/`
|
||||
5. Add example in `tools/` or notebooks
|
||||
|
||||
---
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue on [GitHub](https://github.com/yakhyo/uniface/issues).
|
||||
133
docs/index.md
Normal file
133
docs/index.md
Normal file
@@ -0,0 +1,133 @@
|
||||
---
|
||||
hide:
|
||||
- toc
|
||||
- navigation
|
||||
- edit
|
||||
template: home.html
|
||||
---
|
||||
|
||||
<div class="hero" markdown>
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">A lightweight, production-ready face analysis library built on ONNX Runtime</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
|
||||
</div>
|
||||
|
||||
<div class="feature-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized detectors (RetinaFace, SCRFD, YOLO) with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-check: Face Recognition
|
||||
AdaFace, ArcFace, MobileFace, and SphereFace embeddings for identity verification.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-map-marker: Landmarks
|
||||
Accurate 106-point facial landmark localization for detailed face analysis.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-account-details: Attributes
|
||||
Age, gender, race (FairFace), and emotion detection from faces.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-man-shimmer: Face Parsing
|
||||
BiSeNet semantic segmentation with 19 facial component classes.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-eye: Gaze Estimation
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-blur: Privacy
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
=== "Standard"
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
=== "GPU (CUDA)"
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
<div class="next-steps-grid" markdown>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-rocket-launch: Quickstart
|
||||
Get up and running in 5 minutes with common use cases.
|
||||
|
||||
[Quickstart Guide →](quickstart.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-school: Tutorials
|
||||
Step-by-step examples for common workflows.
|
||||
|
||||
[View Tutorials →](recipes/image-pipeline.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-api: API Reference
|
||||
Explore individual modules and their APIs.
|
||||
|
||||
[Browse API →](modules/detection.md)
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-book-open-variant: Guides
|
||||
Learn about the architecture and design principles.
|
||||
|
||||
[Read Guides →](concepts/overview.md)
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
174
docs/installation.md
Normal file
174
docs/installation.md
Normal file
@@ -0,0 +1,174 @@
|
||||
# Installation
|
||||
|
||||
This guide covers all installation options for UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: 3.10 or higher
|
||||
- **Operating Systems**: macOS, Linux, Windows
|
||||
|
||||
---
|
||||
|
||||
## Quick Install
|
||||
|
||||
The simplest way to install UniFace:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
This installs the CPU version with all core dependencies.
|
||||
|
||||
---
|
||||
|
||||
## Platform-Specific Installation
|
||||
|
||||
### macOS (Apple Silicon - M1/M2/M3/M4)
|
||||
|
||||
For Apple Silicon Macs, the standard installation automatically includes ARM64 optimizations:
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
!!! tip "Native Performance"
|
||||
The base `onnxruntime` package has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. No additional configuration needed.
|
||||
|
||||
Verify ARM64 installation:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Linux/Windows with NVIDIA GPU
|
||||
|
||||
For CUDA acceleration on NVIDIA GPUs:
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility matrix.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
# Should include: 'CUDAExecutionProvider'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
Works on all platforms with automatic CPU fallback.
|
||||
|
||||
---
|
||||
|
||||
## Install from Source
|
||||
|
||||
For development or the latest features:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
With development dependencies:
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
UniFace has minimal dependencies:
|
||||
|
||||
| Package | Purpose |
|
||||
|---------|---------|
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
|
||||
Test your installation:
|
||||
|
||||
```python
|
||||
import uniface
|
||||
print(f"UniFace version: {uniface.__version__}")
|
||||
|
||||
# Check available ONNX providers
|
||||
import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.10+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.10.x or higher
|
||||
```
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
Models are automatically downloaded on first use. If downloads fail:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Performance Issues on Mac
|
||||
|
||||
Verify you're using the ARM64 build (not x86_64 via Rosetta):
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart Guide](quickstart.md) - Get started in 5 minutes
|
||||
- [Execution Providers](concepts/execution-providers.md) - Hardware acceleration setup
|
||||
24
docs/license-attribution.md
Normal file
24
docs/license-attribution.md
Normal file
@@ -0,0 +1,24 @@
|
||||
# Licenses & Attribution
|
||||
|
||||
## UniFace License
|
||||
|
||||
UniFace is released under the [MIT License](https://opensource.org/licenses/MIT).
|
||||
|
||||
---
|
||||
|
||||
## Model Credits
|
||||
|
||||
| Model | Source | License |
|
||||
|-------|--------|---------|
|
||||
| RetinaFace | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | MIT |
|
||||
| SCRFD | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| YOLOv5-Face | [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | GPL-3.0 |
|
||||
| YOLOv8-Face | [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | GPL-3.0 |
|
||||
| AdaFace | [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) | MIT |
|
||||
| ArcFace | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| MobileFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| BiSeNet | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | MIT |
|
||||
| MobileGaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | MIT |
|
||||
| MiniFASNet | [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | Apache-2.0 |
|
||||
| FairFace | [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) | CC BY 4.0 |
|
||||
358
docs/models.md
Normal file
358
docs/models.md
Normal file
@@ -0,0 +1,358 @@
|
||||
# Model Zoo
|
||||
|
||||
Complete guide to all available models and their performance characteristics.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection Models
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on WIDER FACE dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on WIDER FACE dataset.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| -------------- | ---- | ------ | ------ | ------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
### YOLOv8-Face Family
|
||||
|
||||
YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for bbox regression. Provides detection with 5-point facial landmarks.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ------ | ------ | ------ |
|
||||
| `YOLOV8_LITE_S`| 7.4MB | 93.4% | 91.2% | 78.6% |
|
||||
| `YOLOV8N` :material-check-circle: | 12MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
| Model Name | Backbone | Dataset | Size | IJB-B TAR | IJB-C TAR |
|
||||
| ----------- | -------- | ----------- | ------ | --------- | --------- |
|
||||
| `IR_18` :material-check-circle: | IR-18 | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Training Data & Accuracy"
|
||||
**Dataset**: WebFace4M (4M images) / WebFace12M (12M images)
|
||||
|
||||
**Accuracy**: IJB-B and IJB-C benchmarks, TAR@FAR=0.01%
|
||||
|
||||
!!! tip "Key Innovation"
|
||||
AdaFace introduces adaptive margin that adjusts based on image quality, providing better performance on low-quality images compared to fixed-margin approaches.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
| ----------- | --------- | ------ | ----- | ------ | ------ | -------- | ----- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C accuracy reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! note "Architecture"
|
||||
SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements.
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmark Models
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
Facial landmark localization model.
|
||||
|
||||
| Model Name | Points | Params | Size |
|
||||
| ---------- | ------ | ------ | ---- |
|
||||
| `2D106` | 106 | 3.7M | 14MB |
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
| Group | Points | Count |
|
||||
|-------|--------|-------|
|
||||
| Face contour | 0-32 | 33 points |
|
||||
| Eyebrows | 33-50 | 18 points |
|
||||
| Nose | 51-62 | 12 points |
|
||||
| Eyes | 63-86 | 24 points |
|
||||
| Mouth | 87-105 | 19 points |
|
||||
|
||||
---
|
||||
|
||||
## Attribute Analysis Models
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | ----------- | ------ | ---- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebA
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | --------------------- | ------ | ----- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size |
|
||||
| ------------- | ------- | ------ | ---- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on AffectNet
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation Models
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* |
|
||||
| -------------- | ------ | ------- | ----- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing Models
|
||||
|
||||
### BiSeNet Family
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes |
|
||||
| -------------- | ------ | ------- | ------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale |
|
||||
| ---------- | ------ | ----- |
|
||||
| `V1SE` | 1.2 MB | 4.0 |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
!!! info "Output Format"
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face bounding box from a detector.
|
||||
|
||||
---
|
||||
|
||||
## Model Management
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **YOLOv8-Face Original**: [derronqi/yolov8-face](https://github.com/derronqi/yolov8-face) - Original PyTorch implementation
|
||||
- **YOLOv8-Face ONNX**: [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) - ONNX inference implementation
|
||||
- **AdaFace Original**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace) - Original PyTorch implementation
|
||||
- **AdaFace ONNX**: [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) - ONNX export and inference
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **AdaFace**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
279
docs/modules/attributes.md
Normal file
279
docs/modules/attributes.md
Normal file
@@ -0,0 +1,279 @@
|
||||
# Attributes
|
||||
|
||||
Facial attribute analysis for age, gender, race, and emotion detection.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Attributes | Size | Notes |
|
||||
|-------|------------|------|-------|
|
||||
| **AgeGender** | Age, Gender | 8 MB | Exact age prediction |
|
||||
| **FairFace** | Gender, Age Group, Race | 44 MB | Balanced demographics |
|
||||
| **Emotion** | 7-8 emotions | 2 MB | Requires PyTorch |
|
||||
|
||||
---
|
||||
|
||||
## AgeGender
|
||||
|
||||
Predicts exact age and binary gender.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}") # "Female" or "Male"
|
||||
print(f"Age: {result.age} years")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male" (property)
|
||||
result.age # int, age in years
|
||||
result.age_group # None (not provided by this model)
|
||||
result.race # None (not provided by this model)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace
|
||||
|
||||
Predicts gender, age group, and race with balanced demographics.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Gender: {result.sex}")
|
||||
print(f"Age Group: {result.age_group}")
|
||||
print(f"Race: {result.race}")
|
||||
```
|
||||
|
||||
### Output
|
||||
|
||||
```python
|
||||
# AttributeResult fields
|
||||
result.gender # 0=Female, 1=Male
|
||||
result.sex # "Female" or "Male"
|
||||
result.age # None (not provided by this model)
|
||||
result.age_group # "20-29", "30-39", etc.
|
||||
result.race # Race/ethnicity label
|
||||
```
|
||||
|
||||
### Race Categories
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| White |
|
||||
| Black |
|
||||
| Latino Hispanic |
|
||||
| East Asian |
|
||||
| Southeast Asian |
|
||||
| Indian |
|
||||
| Middle Eastern |
|
||||
|
||||
### Age Groups
|
||||
|
||||
| Group |
|
||||
|-------|
|
||||
| 0-2 |
|
||||
| 3-9 |
|
||||
| 10-19 |
|
||||
| 20-29 |
|
||||
| 30-39 |
|
||||
| 40-49 |
|
||||
| 50-59 |
|
||||
| 60-69 |
|
||||
| 70+ |
|
||||
|
||||
---
|
||||
|
||||
## Emotion
|
||||
|
||||
Predicts facial emotions. Requires PyTorch.
|
||||
|
||||
!!! warning "Optional Dependency"
|
||||
Emotion detection requires PyTorch. Install with:
|
||||
```bash
|
||||
pip install torch
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
detector = RetinaFace()
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = emotion.predict(image, face.landmarks)
|
||||
print(f"Emotion: {result.emotion}")
|
||||
print(f"Confidence: {result.confidence:.2%}")
|
||||
```
|
||||
|
||||
### Emotion Classes
|
||||
|
||||
=== "7-Class (AFFECNET7)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
| Label |
|
||||
|-------|
|
||||
| Neutral |
|
||||
| Happy |
|
||||
| Sad |
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
# 7-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
# 8-class emotion
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Combining Models
|
||||
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Get exact age from AgeGender
|
||||
ag_result = age_gender.predict(image, face.bbox)
|
||||
|
||||
# Get race from FairFace
|
||||
ff_result = fairface.predict(image, face.bbox)
|
||||
|
||||
print(f"Gender: {ag_result.sex}")
|
||||
print(f"Exact Age: {ag_result.age}")
|
||||
print(f"Age Group: {ff_result.age_group}")
|
||||
print(f"Race: {ff_result.race}")
|
||||
```
|
||||
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=False,
|
||||
attributes=True # Uses AgeGender
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_attributes(image, face, result):
|
||||
"""Draw attributes on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Build label
|
||||
label = f"{result.sex}"
|
||||
if result.age:
|
||||
label += f", {result.age}y"
|
||||
if result.age_group:
|
||||
label += f", {result.age_group}"
|
||||
if result.race:
|
||||
label += f", {result.race}"
|
||||
|
||||
# Draw label
|
||||
cv2.putText(
|
||||
image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2
|
||||
)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
image = draw_attributes(image, face, result)
|
||||
|
||||
cv2.imwrite("attributes.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Accuracy Notes
|
||||
|
||||
!!! note "Model Limitations"
|
||||
- **AgeGender**: Trained on CelebA; accuracy varies by demographic
|
||||
- **FairFace**: Trained for balanced demographics; better cross-racial accuracy
|
||||
- **Emotion**: Accuracy depends on facial expression clarity
|
||||
|
||||
Always test on your specific use case and consider cultural context.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Parsing](parsing.md) - Face semantic segmentation
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete workflow
|
||||
309
docs/modules/detection.md
Normal file
309
docs/modules/detection.md
Normal file
@@ -0,0 +1,309 @@
|
||||
# Detection
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides four detection models.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Easy | Medium | Hard | Landmarks |
|
||||
|-------|----------|------|------|--------|------|:---------:|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% | 91.0% | 86.6% | :material-check: |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% | 93.9% | 83.1% | :material-check: |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% | 92.6% | 83.2% | :material-check: |
|
||||
| **YOLOv8-Face** | YOLOv8n | 12 MB | 94.6% | 92.3% | 79.6% | :material-check: |
|
||||
|
||||
!!! note "Dataset"
|
||||
All models trained on WIDERFACE dataset.
|
||||
---
|
||||
|
||||
## RetinaFace
|
||||
|
||||
Single-shot face detector with multi-scale feature pyramid.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"BBox: {face.bbox}")
|
||||
print(f"Landmarks: {face.landmarks.shape}") # (5, 2)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# High accuracy
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| MNET_025 | 0.4M | 1.7 MB | 88.5% | 87.0% | 80.6% |
|
||||
| MNET_050 | 1.0M | 2.6 MB | 89.4% | 88.0% | 82.4% |
|
||||
| MNET_V1 | 3.5M | 3.8 MB | 90.6% | 89.1% | 84.1% |
|
||||
| **MNET_V2** :material-check-circle: | 3.2M | 3.5 MB | 91.7% | 91.0% | 86.6% |
|
||||
| RESNET18 | 11.7M | 27 MB | 92.5% | 91.0% | 86.6% |
|
||||
| RESNET34 | 24.8M | 56 MB | 94.2% | 93.1% | 88.9% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
confidence_threshold=0.5, # Min confidence
|
||||
nms_threshold=0.4, # NMS IoU threshold
|
||||
input_size=(640, 640), # Input resolution
|
||||
dynamic_size=False, # Enable dynamic input size
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## SCRFD
|
||||
|
||||
State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
|
||||
# High accuracy (default)
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Easy | Medium | Hard |
|
||||
|---------|--------|------|------|--------|------|
|
||||
| SCRFD_500M_KPS | 0.6M | 2.5 MB | 90.6% | 88.1% | 68.5% |
|
||||
| **SCRFD_10G_KPS** :material-check-circle: | 4.2M | 17 MB | 95.2% | 93.9% | 83.1% |
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
input_size=(640, 640),
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv5-Face
|
||||
|
||||
YOLO-based detection optimized for faces.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5N)
|
||||
|
||||
# Balanced (default)
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5S)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV5N | 11 MB | 93.6% | 91.5% | 80.5% |
|
||||
| **YOLOV5S** :material-check-circle: | 28 MB | 94.3% | 92.6% | 83.2% |
|
||||
| YOLOV5M | 82 MB | 95.3% | 93.8% | 85.3% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv5-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv8-Face
|
||||
|
||||
Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regression.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8_LITE_S)
|
||||
|
||||
# Recommended (default)
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV8_LITE_S | 7.4 MB | 93.4% | 91.2% | 78.6% |
|
||||
| **YOLOV8N** :material-check-circle: | 12 MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv8-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv8Face(
|
||||
model_name=YOLOv8FaceWeights.YOLOV8N,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.45,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
detector = create_detector('scrfd')
|
||||
# or
|
||||
detector = create_detector('yolov5face')
|
||||
# or
|
||||
detector = create_detector('yolov8face')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# Using RetinaFace (default)
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.5)
|
||||
|
||||
# Using YOLOv8-Face
|
||||
faces = detect_faces(image, method='yolov8face', confidence_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
|
||||
```python
|
||||
for face in faces:
|
||||
# Bounding box [x1, y1, x2, y2]
|
||||
bbox = face.bbox
|
||||
|
||||
# Detection confidence (0-1)
|
||||
confidence = face.confidence
|
||||
|
||||
# 5-point landmarks (5, 2)
|
||||
landmarks = face.landmarks
|
||||
# [left_eye, right_eye, nose, left_mouth, right_mouth]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces],
|
||||
vis_threshold=0.6
|
||||
)
|
||||
|
||||
cv2.imwrite("result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Comparison
|
||||
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg --iterations 100
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](recognition.md) - Extract embeddings from detected faces
|
||||
- [Landmarks Module](landmarks.md) - Get 106-point landmarks
|
||||
- [Image Pipeline Recipe](../recipes/image-pipeline.md) - Complete detection workflow
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning detection parameters
|
||||
270
docs/modules/gaze.md
Normal file
270
docs/modules/gaze.md
Normal file
@@ -0,0 +1,270 @@
|
||||
# Gaze Estimation
|
||||
|
||||
Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* |
|
||||
|-------|----------|------|------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° |
|
||||
|
||||
*MAE = Mean Absolute Error on Gaze360 test set (lower is better)
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
# Estimate gaze
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Convert to degrees
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
print(f"Pitch: {pitch_deg:.1f}°, Yaw: {yaw_deg:.1f}°")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
gaze = MobileGaze()
|
||||
|
||||
# Lightweight for mobile/edge
|
||||
gaze = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Higher accuracy
|
||||
gaze = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# GazeResult dataclass
|
||||
result.pitch # Vertical angle in radians
|
||||
result.yaw # Horizontal angle in radians
|
||||
```
|
||||
|
||||
### Angle Convention
|
||||
|
||||
```
|
||||
pitch = +90° (looking up)
|
||||
│
|
||||
│
|
||||
yaw = -90° ────┼──── yaw = +90°
|
||||
(looking left) │ (looking right)
|
||||
│
|
||||
pitch = -90° (looking down)
|
||||
```
|
||||
|
||||
- **Pitch**: Vertical gaze angle
|
||||
- Positive = looking up
|
||||
- Negative = looking down
|
||||
|
||||
- **Yaw**: Horizontal gaze angle
|
||||
- Positive = looking right
|
||||
- Negative = looking left
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw gaze arrow on image
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
### Custom Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
"""Draw custom gaze arrow."""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Face center
|
||||
cx = (x1 + x2) // 2
|
||||
cy = (y1 + y2) // 2
|
||||
|
||||
# Calculate endpoint
|
||||
dx = -length * np.sin(yaw) * np.cos(pitch)
|
||||
dy = -length * np.sin(pitch)
|
||||
|
||||
# Draw arrow
|
||||
end_x = int(cx + dx)
|
||||
end_y = int(cy + dy)
|
||||
|
||||
cv2.arrowedLine(image, (cx, cy), (end_x, end_y), color, 2, tipLength=0.3)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Gaze Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Draw gaze
|
||||
draw_gaze(frame, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
# Display angles
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
label = f"P:{pitch_deg:.0f} Y:{yaw_deg:.0f}"
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
cv2.imshow("Gaze Estimation", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Attention Detection
|
||||
|
||||
```python
|
||||
def is_looking_at_camera(result, threshold=15):
|
||||
"""Check if person is looking at camera."""
|
||||
pitch_deg = abs(np.degrees(result.pitch))
|
||||
yaw_deg = abs(np.degrees(result.yaw))
|
||||
|
||||
return pitch_deg < threshold and yaw_deg < threshold
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
if is_looking_at_camera(result):
|
||||
print("Looking at camera")
|
||||
else:
|
||||
print("Looking away")
|
||||
```
|
||||
|
||||
### Gaze Direction Classification
|
||||
|
||||
```python
|
||||
def classify_gaze_direction(result, threshold=20):
|
||||
"""Classify gaze into directions."""
|
||||
pitch_deg = np.degrees(result.pitch)
|
||||
yaw_deg = np.degrees(result.yaw)
|
||||
|
||||
directions = []
|
||||
|
||||
if pitch_deg > threshold:
|
||||
directions.append("up")
|
||||
elif pitch_deg < -threshold:
|
||||
directions.append("down")
|
||||
|
||||
if yaw_deg > threshold:
|
||||
directions.append("right")
|
||||
elif yaw_deg < -threshold:
|
||||
directions.append("left")
|
||||
|
||||
if not directions:
|
||||
return "center"
|
||||
|
||||
return " ".join(directions)
|
||||
|
||||
# Usage
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
direction = classify_gaze_direction(result)
|
||||
print(f"Looking: {direction}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anti-Spoofing](spoofing.md) - Face liveness detection
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Video Recipe](../recipes/video-webcam.md) - Real-time processing
|
||||
251
docs/modules/landmarks.md
Normal file
251
docs/modules/landmarks.md
Normal file
@@ -0,0 +1,251 @@
|
||||
# Landmarks
|
||||
|
||||
Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Points | Size |
|
||||
|-------|--------|------|
|
||||
| **Landmark106** | 106 | 14 MB |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
|
||||
|
||||
---
|
||||
|
||||
## 106-Point Landmarks
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Get detailed landmarks
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Landmarks shape: {landmarks.shape}") # (106, 2)
|
||||
```
|
||||
|
||||
### Landmark Groups
|
||||
|
||||
| Range | Group | Points |
|
||||
|-------|-------|--------|
|
||||
| 0-32 | Face Contour | 33 |
|
||||
| 33-50 | Eyebrows | 18 |
|
||||
| 51-62 | Nose | 12 |
|
||||
| 63-86 | Eyes | 24 |
|
||||
| 87-105 | Mouth | 19 |
|
||||
|
||||
### Extract Specific Features
|
||||
|
||||
```python
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
|
||||
# Face contour
|
||||
contour = landmarks[0:33]
|
||||
|
||||
# Left eyebrow
|
||||
left_eyebrow = landmarks[33:42]
|
||||
|
||||
# Right eyebrow
|
||||
right_eyebrow = landmarks[42:51]
|
||||
|
||||
# Nose
|
||||
nose = landmarks[51:63]
|
||||
|
||||
# Left eye
|
||||
left_eye = landmarks[63:72]
|
||||
|
||||
# Right eye
|
||||
right_eye = landmarks[76:84]
|
||||
|
||||
# Mouth
|
||||
mouth = landmarks[87:106]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5-Point Landmarks (Detection)
|
||||
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks_5 = faces[0].landmarks
|
||||
print(f"Shape: {landmarks_5.shape}") # (5, 2)
|
||||
|
||||
left_eye = landmarks_5[0]
|
||||
right_eye = landmarks_5[1]
|
||||
nose = landmarks_5[2]
|
||||
left_mouth = landmarks_5[3]
|
||||
right_mouth = landmarks_5[4]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
### Draw 106 Landmarks
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_landmarks(image, landmarks, color=(0, 255, 0), radius=2):
|
||||
"""Draw landmarks on image."""
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), radius, color, -1)
|
||||
return image
|
||||
|
||||
# Usage
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
image_with_landmarks = draw_landmarks(image.copy(), landmarks)
|
||||
cv2.imwrite("landmarks.jpg", image_with_landmarks)
|
||||
```
|
||||
|
||||
### Draw with Connections
|
||||
|
||||
```python
|
||||
def draw_landmarks_with_connections(image, landmarks):
|
||||
"""Draw landmarks with facial feature connections."""
|
||||
landmarks = landmarks.astype(int)
|
||||
|
||||
# Face contour (0-32)
|
||||
for i in range(32):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (255, 255, 0), 1)
|
||||
|
||||
# Left eyebrow (33-41)
|
||||
for i in range(33, 41):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Right eyebrow (42-50)
|
||||
for i in range(42, 50):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 255, 0), 1)
|
||||
|
||||
# Nose (51-62)
|
||||
for i in range(51, 62):
|
||||
cv2.line(image, tuple(landmarks[i]), tuple(landmarks[i+1]), (0, 0, 255), 1)
|
||||
|
||||
# Draw points
|
||||
for x, y in landmarks:
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 255), -1)
|
||||
|
||||
return image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
# Returns: 112x112 aligned face
|
||||
```
|
||||
|
||||
### Eye Aspect Ratio (Blink Detection)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
def eye_aspect_ratio(eye_landmarks):
|
||||
"""Calculate eye aspect ratio for blink detection."""
|
||||
# Vertical distances
|
||||
v1 = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
|
||||
v2 = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])
|
||||
|
||||
# Horizontal distance
|
||||
h = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])
|
||||
|
||||
ear = (v1 + v2) / (2.0 * h)
|
||||
return ear
|
||||
|
||||
# Usage with 106-point landmarks
|
||||
left_eye = landmarks[63:72] # Approximate eye points
|
||||
ear = eye_aspect_ratio(left_eye)
|
||||
|
||||
if ear < 0.2:
|
||||
print("Eye closed (blink detected)")
|
||||
```
|
||||
|
||||
### Head Pose Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def estimate_head_pose(landmarks, image_shape):
|
||||
"""Estimate head pose from facial landmarks."""
|
||||
# 3D model points (generic face model)
|
||||
model_points = np.array([
|
||||
(0.0, 0.0, 0.0), # Nose tip
|
||||
(0.0, -330.0, -65.0), # Chin
|
||||
(-225.0, 170.0, -135.0), # Left eye corner
|
||||
(225.0, 170.0, -135.0), # Right eye corner
|
||||
(-150.0, -150.0, -125.0), # Left mouth corner
|
||||
(150.0, -150.0, -125.0) # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# 2D image points (from 106 landmarks)
|
||||
image_points = np.array([
|
||||
landmarks[51], # Nose tip
|
||||
landmarks[16], # Chin
|
||||
landmarks[63], # Left eye corner
|
||||
landmarks[76], # Right eye corner
|
||||
landmarks[87], # Left mouth corner
|
||||
landmarks[93] # Right mouth corner
|
||||
], dtype=np.float64)
|
||||
|
||||
# Camera matrix
|
||||
h, w = image_shape[:2]
|
||||
focal_length = w
|
||||
center = (w / 2, h / 2)
|
||||
camera_matrix = np.array([
|
||||
[focal_length, 0, center[0]],
|
||||
[0, focal_length, center[1]],
|
||||
[0, 0, 1]
|
||||
], dtype=np.float64)
|
||||
|
||||
# Solve PnP
|
||||
dist_coeffs = np.zeros((4, 1))
|
||||
success, rotation_vector, translation_vector = cv2.solvePnP(
|
||||
model_points, image_points, camera_matrix, dist_coeffs
|
||||
)
|
||||
|
||||
return rotation_vector, translation_vector
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Face detection with 5-point landmarks
|
||||
- [Attributes Module](attributes.md) - Age, gender, emotion
|
||||
- [Gaze Module](gaze.md) - Gaze estimation
|
||||
- [Concepts: Coordinate Systems](../concepts/coordinate-systems.md) - Landmark formats
|
||||
265
docs/modules/parsing.md
Normal file
265
docs/modules/parsing.md
Normal file
@@ -0,0 +1,265 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components (skin, eyes, nose, mouth, hair, etc.).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Classes |
|
||||
|-------|----------|------|---------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save result
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 19 Facial Component Classes
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (ResNet18)
|
||||
parser = BiSeNet()
|
||||
|
||||
# Higher accuracy (ResNet34)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size |
|
||||
|---------|--------|------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB |
|
||||
| RESNET34 | 24.1M | 89 MB |
|
||||
|
||||
---
|
||||
|
||||
## Full Pipeline
|
||||
|
||||
### With Face Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Crop face
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize
|
||||
face_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Save
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite(f"face_{i}_parsed.jpg", vis_bgr)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Specific Components
|
||||
|
||||
### Get Single Component Mask
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
# Parse face
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 18
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
# Binary mask for skin
|
||||
skin_mask = (mask == SKIN).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for hair
|
||||
hair_mask = (mask == HAIR).astype(np.uint8) * 255
|
||||
|
||||
# Binary mask for eyes
|
||||
eyes_mask = ((mask == LEFT_EYE) | (mask == RIGHT_EYE)).astype(np.uint8) * 255
|
||||
```
|
||||
|
||||
### Count Pixels per Component
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Glasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Hat', 10: 'Earring', 11: 'Nose', 12: 'Mouth',
|
||||
13: 'U-Lip', 14: 'L-Lip', 15: 'Neck', 16: 'Necklace',
|
||||
17: 'Cloth', 18: 'Hair'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
pixel_count = np.sum(mask == class_id)
|
||||
name = component_names.get(class_id, f'Class {class_id}')
|
||||
print(f"{name}: {pixel_count} pixels")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Applications
|
||||
|
||||
### Face Makeup
|
||||
|
||||
Apply virtual makeup using component masks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
|
||||
```python
|
||||
def replace_background(image, mask, background):
|
||||
"""Replace background using parsing mask."""
|
||||
# Create foreground mask (everything except background)
|
||||
foreground_mask = (mask != 0).astype(np.uint8)
|
||||
|
||||
# Resize background to match image
|
||||
background = cv2.resize(background, (image.shape[1], image.shape[0]))
|
||||
|
||||
# Combine
|
||||
result = image.copy()
|
||||
result[foreground_mask == 0] = background[foreground_mask == 0]
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
### Hair Segmentation
|
||||
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 18).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_CLOSE, kernel)
|
||||
hair_mask = cv2.morphologyEx(hair_mask, cv2.MORPH_OPEN, kernel)
|
||||
|
||||
return hair_mask
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
|
||||
# With different parameters
|
||||
vis_result = vis_parsing_maps(
|
||||
face_rgb,
|
||||
mask,
|
||||
save_image=False, # Don't save to file
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
|
||||
parser = create_face_parser() # Returns BiSeNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Gaze](gaze.md) - Gaze estimation
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
277
docs/modules/privacy.md
Normal file
277
docs/modules/privacy.md
Normal file
@@ -0,0 +1,277 @@
|
||||
# Privacy
|
||||
|
||||
Face anonymization protects privacy by blurring or obscuring faces in images and videos.
|
||||
|
||||
---
|
||||
|
||||
## Available Methods
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| **pixelate** | Blocky pixelation |
|
||||
| **gaussian** | Smooth blur |
|
||||
| **blackout** | Solid color fill |
|
||||
| **elliptical** | Oval-shaped blur |
|
||||
| **median** | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Blur Methods
|
||||
|
||||
### Pixelate
|
||||
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 10 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
Smooth, natural-looking blur:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity (higher = more blur) |
|
||||
|
||||
### Blackout
|
||||
|
||||
Solid color fill for maximum privacy:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `color` | (0, 0, 0) | Fill color (BGR format) |
|
||||
|
||||
### Elliptical
|
||||
|
||||
Oval-shaped blur matching natural face shape:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=20)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
| `margin` | 20 | Margin around face |
|
||||
|
||||
### Median
|
||||
|
||||
Edge-preserving blur with artistic effect:
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `blur_strength` | 3.0 | Blur intensity |
|
||||
|
||||
---
|
||||
|
||||
## In-Place Processing
|
||||
|
||||
Modify image directly (faster, saves memory):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
# In-place modification
|
||||
result = blurrer.anonymize(image, faces, inplace=True)
|
||||
# 'image' and 'result' point to the same array
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Anonymization
|
||||
|
||||
### Webcam
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
### Video File
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input_video.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (width, height))
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Selective Anonymization
|
||||
|
||||
### Exclude Specific Faces
|
||||
|
||||
```python
|
||||
def anonymize_except(image, all_faces, exclude_embeddings, recognizer, threshold=0.6):
|
||||
"""Anonymize all faces except those matching exclude_embeddings."""
|
||||
faces_to_blur = []
|
||||
|
||||
for face in all_faces:
|
||||
# Get embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Check if should be excluded
|
||||
should_exclude = False
|
||||
for ref_emb in exclude_embeddings:
|
||||
similarity = np.dot(embedding, ref_emb.T)[0][0]
|
||||
if similarity > threshold:
|
||||
should_exclude = True
|
||||
break
|
||||
|
||||
if not should_exclude:
|
||||
faces_to_blur.append(face)
|
||||
|
||||
# Blur remaining faces
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
### Confidence-Based
|
||||
|
||||
```python
|
||||
def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
"""Anonymize faces below confidence threshold."""
|
||||
faces_to_blur = [f for f in faces if f.confidence < confidence_threshold]
|
||||
return blurrer.anonymize(image, faces_to_blur)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparison
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
methods = ['pixelate', 'gaussian', 'blackout', 'elliptical', 'median']
|
||||
|
||||
for method in methods:
|
||||
blurrer = BlurFace(method=method)
|
||||
result = blurrer.anonymize(image.copy(), faces)
|
||||
cv2.imwrite(f"anonymized_{method}.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Anonymize Stream Recipe](../recipes/anonymize-stream.md) - Video pipeline
|
||||
- [Detection](detection.md) - Face detection options
|
||||
- [Batch Processing Recipe](../recipes/batch-processing.md) - Process multiple files
|
||||
298
docs/modules/recognition.md
Normal file
298
docs/modules/recognition.md
Normal file
@@ -0,0 +1,298 @@
|
||||
# Recognition
|
||||
|
||||
Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Embedding Dim |
|
||||
|-------|----------|------|---------------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 |
|
||||
|
||||
---
|
||||
|
||||
## AdaFace
|
||||
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_18)
|
||||
|
||||
# High accuracy
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_101)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = AdaFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Dataset | Size | IJB-B | IJB-C |
|
||||
|---------|---------|------|-------|-------|
|
||||
| **IR_18** :material-check-circle: | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| IR_101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Benchmark Metrics"
|
||||
IJB-B and IJB-C accuracy reported as TAR@FAR=0.01%
|
||||
|
||||
---
|
||||
|
||||
## ArcFace
|
||||
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Detect face
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract embedding
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
print(f"Embedding shape: {embedding.shape}") # (1, 512)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.MNET)
|
||||
|
||||
# High accuracy
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Backbone | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
|---------|----------|------|-----|--------|----------|-------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| RESNET | ResNet50 | 166 MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data & Metrics"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_025)
|
||||
|
||||
# Balanced (default)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
# Higher accuracy
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V3_LARGE)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
---
|
||||
|
||||
## SphereFace
|
||||
|
||||
Face recognition using angular softmax loss (A-Softmax).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
---
|
||||
|
||||
## Face Comparison
|
||||
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, landmarks1)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, landmarks2)
|
||||
|
||||
# Method 1: Using utility function
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Method 2: Direct computation
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
```
|
||||
|
||||
### Threshold Guidelines
|
||||
|
||||
| Threshold | Decision | Use Case |
|
||||
|-----------|----------|----------|
|
||||
| > 0.7 | Very high confidence | Security-critical |
|
||||
| > 0.6 | Same person | General verification |
|
||||
| 0.4 - 0.6 | Uncertain | Manual review needed |
|
||||
| < 0.4 | Different people | Rejection |
|
||||
|
||||
---
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Recognition models require aligned faces. UniFace handles this internally:
|
||||
|
||||
```python
|
||||
# Alignment is done automatically
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Building a Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build database
|
||||
database = {}
|
||||
for person_id, image_path in person_images.items():
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
database[person_id] = embedding
|
||||
|
||||
# Save for later use
|
||||
np.savez('face_database.npz', **database)
|
||||
|
||||
# Load database
|
||||
data = np.load('face_database.npz')
|
||||
database = {key: data[key] for key in data.files}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Search
|
||||
|
||||
Find a person in a database:
|
||||
|
||||
```python
|
||||
def search_face(query_embedding, database, threshold=0.6):
|
||||
"""Find best match in database."""
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in database.items():
|
||||
similarity = np.dot(query_embedding, db_embedding.T)[0][0]
|
||||
|
||||
if similarity > best_similarity and similarity > threshold:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
return best_match, best_similarity
|
||||
|
||||
# Usage
|
||||
query_embedding = recognizer.get_normalized_embedding(query_image, landmarks)
|
||||
match, similarity = search_face(query_embedding, database)
|
||||
|
||||
if match:
|
||||
print(f"Found: {match} (similarity: {similarity:.4f})")
|
||||
else:
|
||||
print("No match found")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
recognizer = create_recognizer('adaface')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](detection.md) - Detect faces first
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Complete search system
|
||||
- [Thresholds](../concepts/thresholds-calibration.md) - Calibration guide
|
||||
266
docs/modules/spoofing.md
Normal file
266
docs/modules/spoofing.md
Normal file
@@ -0,0 +1,266 @@
|
||||
# Anti-Spoofing
|
||||
|
||||
Face anti-spoofing detects whether a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Size |
|
||||
|-------|------|
|
||||
| MiniFASNet V1SE | 1.2 MB |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB |
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# SpoofingResult dataclass
|
||||
result.is_real # True = real, False = fake
|
||||
result.confidence # 0.0 to 1.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
```
|
||||
|
||||
| Variant | Size | Scale Factor |
|
||||
|---------|------|--------------|
|
||||
| V1SE | 1.2 MB | 4.0 |
|
||||
| **V2** :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
---
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
The default threshold is 0.5. Adjust for your use case:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced
|
||||
if result.is_real: # Uses default 0.5 threshold
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def draw_spoofing_result(image, face, result):
|
||||
"""Draw spoofing result on image."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Color based on result
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = "Real" if result.is_real else "Fake"
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
# Draw label
|
||||
text = f"{label}: {result.confidence:.1%}"
|
||||
cv2.putText(image, text, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
image = draw_spoofing_result(image, face, result)
|
||||
|
||||
cv2.imwrite("spoofing_result.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Liveness Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
|
||||
# Draw result
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
color = (0, 255, 0) if result.is_real else (0, 0, 255)
|
||||
label = f"{'Real' if result.is_real else 'Fake'}: {result.confidence:.0%}"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Liveness Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Use Cases
|
||||
|
||||
### Access Control
|
||||
|
||||
```python
|
||||
def verify_liveness(image, face, spoofer, threshold=0.6):
|
||||
"""Verify face is real for access control."""
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
if result.is_real and result.confidence > threshold:
|
||||
return True, result.confidence
|
||||
return False, result.confidence
|
||||
|
||||
# Usage
|
||||
is_live, confidence = verify_liveness(image, face, spoofer)
|
||||
if is_live:
|
||||
print(f"Access granted (confidence: {confidence:.1%})")
|
||||
else:
|
||||
print(f"Access denied - possible spoof attempt")
|
||||
```
|
||||
|
||||
### Multi-Frame Verification
|
||||
|
||||
For higher security, verify across multiple frames:
|
||||
|
||||
```python
|
||||
def verify_liveness_multiframe(frames, detector, spoofer, min_real=3):
|
||||
"""Verify liveness across multiple frames."""
|
||||
real_count = 0
|
||||
|
||||
for frame in frames:
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
continue
|
||||
|
||||
result = spoofer.predict(frame, faces[0].bbox)
|
||||
if result.is_real:
|
||||
real_count += 1
|
||||
|
||||
return real_count >= min_real
|
||||
|
||||
# Collect frames and verify
|
||||
frames = []
|
||||
for _ in range(5):
|
||||
ret, frame = cap.read()
|
||||
if ret:
|
||||
frames.append(frame)
|
||||
|
||||
is_verified = verify_liveness_multiframe(frames, detector, spoofer)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Attack Types Detected
|
||||
|
||||
MiniFASNet can detect various spoof attacks:
|
||||
|
||||
| Attack Type | Detection |
|
||||
|-------------|-----------|
|
||||
| Printed photos | ✅ |
|
||||
| Screen replay | ✅ |
|
||||
| Video replay | ✅ |
|
||||
| Paper masks | ✅ |
|
||||
| 3D masks | Limited |
|
||||
|
||||
!!! warning "Limitations"
|
||||
- High-quality 3D masks may not be detected
|
||||
- Performance varies with lighting and image quality
|
||||
- Always combine with other verification methods for high-security applications
|
||||
|
||||
---
|
||||
|
||||
## Command-Line Tool
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Privacy](privacy.md) - Face anonymization
|
||||
- [Detection](detection.md) - Face detection
|
||||
- [Recognition](recognition.md) - Face recognition
|
||||
57
docs/notebooks.md
Normal file
57
docs/notebooks.md
Normal file
@@ -0,0 +1,57 @@
|
||||
# Interactive Notebooks
|
||||
|
||||
Run UniFace examples directly in your browser with Google Colab, or download and run locally with Jupyter.
|
||||
|
||||
---
|
||||
|
||||
## Available Notebooks
|
||||
|
||||
| Notebook | Colab | Description |
|
||||
|----------|:-----:|-------------|
|
||||
| [Face Detection](https://github.com/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/01_face_detection.ipynb) | Detect faces and 5-point landmarks |
|
||||
| [Face Alignment](https://github.com/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/02_face_alignment.ipynb) | Align faces for recognition |
|
||||
| [Face Verification](https://github.com/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/03_face_verification.ipynb) | Compare faces for identity |
|
||||
| [Face Search](https://github.com/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/04_face_search.ipynb) | Find a person in group photos |
|
||||
| [Face Analyzer](https://github.com/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/05_face_analyzer.ipynb) | All-in-one face analysis |
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
Download and run notebooks on your machine:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
|
||||
# Install dependencies
|
||||
pip install uniface jupyter
|
||||
|
||||
# Launch Jupyter
|
||||
jupyter notebook examples/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running on Google Colab
|
||||
|
||||
Click any **"Open in Colab"** badge above. The notebooks automatically:
|
||||
|
||||
1. Install UniFace via pip
|
||||
2. Clone the repository to access test images
|
||||
3. Set up the correct working directory
|
||||
|
||||
!!! tip "GPU Acceleration"
|
||||
In Colab, go to **Runtime → Change runtime type → GPU** for faster inference.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Quickstart](quickstart.md) - Code snippets for common use cases
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step workflow guides
|
||||
- [API Reference](modules/detection.md) - Detailed module documentation
|
||||
5
docs/overrides/home.html
Normal file
5
docs/overrides/home.html
Normal file
@@ -0,0 +1,5 @@
|
||||
{% extends "main.html" %}
|
||||
|
||||
{% block source %}
|
||||
<!-- Hide edit/view source on home page -->
|
||||
{% endblock %}
|
||||
426
docs/quickstart.md
Normal file
426
docs/quickstart.md
Normal file
@@ -0,0 +1,426 @@
|
||||
# Quickstart
|
||||
|
||||
Get up and running with UniFace in 5 minutes. This guide covers the most common use cases.
|
||||
|
||||
---
|
||||
|
||||
## Face Detection
|
||||
|
||||
Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
BBox: [120.5, 85.3, 245.8, 210.6]
|
||||
Landmarks: 5 points
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualize Detections
|
||||
|
||||
Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition
|
||||
|
||||
Compare two faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Load two images
|
||||
image1 = cv2.imread("person1.jpg")
|
||||
image2 = cv2.imread("person2.jpg")
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
|
||||
# Interpret result
|
||||
if similarity > 0.6:
|
||||
print(f"Same person (similarity: {similarity:.3f})")
|
||||
else:
|
||||
print(f"Different people (similarity: {similarity:.3f})")
|
||||
```
|
||||
|
||||
!!! tip "Similarity Thresholds"
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
|
||||
---
|
||||
|
||||
## Age & Gender Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FairFace Attributes
|
||||
|
||||
Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Facial Landmarks (106 Points)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.imwrite("landmarks.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Gaze Estimation
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Parsing
|
||||
|
||||
Segment face into semantic components:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Anonymization
|
||||
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `pixelate` | Blocky effect (news media standard) |
|
||||
| `gaussian` | Smooth, natural blur |
|
||||
| `blackout` | Solid color boxes (maximum privacy) |
|
||||
| `elliptical` | Soft oval blur (natural face shape) |
|
||||
| `median` | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Face Anti-Spoofing
|
||||
|
||||
Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Webcam Demo
|
||||
|
||||
Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks)
|
||||
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
|
||||
**Available models by task:**
|
||||
|
||||
| Task | Available Models |
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
| Anti-Spoofing | `MiniFASNet` (V1SE, V2) |
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Models Not Downloading
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Manually download a model
|
||||
model_path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Model downloaded to: {model_path}")
|
||||
```
|
||||
|
||||
### Check Hardware Acceleration
|
||||
|
||||
```python
|
||||
import onnxruntime as ort
|
||||
print("Available providers:", ort.get_available_providers())
|
||||
|
||||
# macOS M-series should show: ['CoreMLExecutionProvider', ...]
|
||||
# NVIDIA GPU should show: ['CUDAExecutionProvider', ...]
|
||||
```
|
||||
|
||||
### Slow Performance on Mac
|
||||
|
||||
Verify you're using the ARM64 build of Python:
|
||||
|
||||
```bash
|
||||
python -c "import platform; print(platform.machine())"
|
||||
# Should show: arm64 (not x86_64)
|
||||
```
|
||||
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Model Zoo](models.md) - All models, benchmarks, and selection guide
|
||||
- [API Reference](modules/detection.md) - Explore individual modules and their APIs
|
||||
- [Tutorials](recipes/image-pipeline.md) - Step-by-step examples for common workflows
|
||||
- [Guides](concepts/overview.md) - Learn about the architecture and design principles
|
||||
99
docs/recipes/anonymize-stream.md
Normal file
99
docs/recipes/anonymize-stream.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# Anonymize Stream
|
||||
|
||||
Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Anonymization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian')
|
||||
|
||||
cap = cv2.VideoCapture("input.mp4")
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
w, h = int(cap.get(3)), int(cap.get(4))
|
||||
|
||||
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
blurrer.anonymize(frame, faces, inplace=True)
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Available Blur Methods
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
| Median | `BlurFace(method='median', blur_strength=3.0)` |
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Privacy Module](../modules/privacy.md) - Privacy protection details
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Detection Module](../modules/detection.md) - Face detection
|
||||
83
docs/recipes/batch-processing.md
Normal file
83
docs/recipes/batch-processing.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Batch Processing
|
||||
|
||||
Process multiple images efficiently.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Batch Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
def process_directory(input_dir, output_dir):
|
||||
"""Process all images in a directory."""
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for image_path in input_path.glob("*.jpg"):
|
||||
print(f"Processing {image_path.name}...")
|
||||
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
print(f" Found {len(faces)} face(s)")
|
||||
|
||||
# Process and save results
|
||||
# ... your code here ...
|
||||
|
||||
# Usage
|
||||
process_directory("input_images/", "output_images/")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## With Progress Bar
|
||||
|
||||
```python
|
||||
from tqdm import tqdm
|
||||
|
||||
for image_path in tqdm(image_files, desc="Processing"):
|
||||
# ... process image ...
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
embeddings = {}
|
||||
for image_path in Path("faces/").glob("*.jpg"):
|
||||
image = cv2.imread(str(image_path))
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
embeddings[image_path.stem] = embedding
|
||||
|
||||
# Save embeddings
|
||||
np.savez("embeddings.npz", **embeddings)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Search through embeddings
|
||||
- [Image Pipeline](image-pipeline.md) - Full analysis pipeline
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
114
docs/recipes/custom-models.md
Normal file
114
docs/recipes/custom-models.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Custom Models
|
||||
|
||||
Add your own ONNX models to UniFace.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns for advanced users. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
UniFace is designed to be extensible. You can add custom ONNX models by:
|
||||
|
||||
1. Creating a class that inherits from the appropriate base class
|
||||
2. Implementing required methods
|
||||
3. Using the ONNX Runtime utilities provided by UniFace
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Detection Model
|
||||
|
||||
```python
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self._preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
faces = self._postprocess(outputs, image.shape)
|
||||
return faces
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
pass
|
||||
|
||||
def _postprocess(self, outputs, shape):
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface import face_alignment
|
||||
import numpy as np
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str):
|
||||
self.session = create_onnx_session(model_path)
|
||||
|
||||
def get_normalized_embedding(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
landmarks: np.ndarray
|
||||
) -> np.ndarray:
|
||||
# 1. Align face
|
||||
aligned = face_alignment(image, landmarks)
|
||||
|
||||
# 2. Preprocess
|
||||
input_tensor = self._preprocess(aligned)
|
||||
|
||||
# 3. Run inference
|
||||
embedding = self.session.run(None, {'input': input_tensor})[0]
|
||||
|
||||
# 4. Normalize
|
||||
embedding = embedding / np.linalg.norm(embedding)
|
||||
return embedding
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
pass
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
```python
|
||||
from my_module import MyDetector, MyRecognizer
|
||||
|
||||
# Use custom models
|
||||
detector = MyDetector("path/to/detection_model.onnx")
|
||||
recognizer = MyRecognizer("path/to/recognition_model.onnx")
|
||||
|
||||
# Use like built-in models
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection Module](../modules/detection.md) - Built-in detection models
|
||||
- [Recognition Module](../modules/recognition.md) - Built-in recognition models
|
||||
- [Concepts: Overview](../concepts/overview.md) - Architecture overview
|
||||
178
docs/recipes/face-search.md
Normal file
178
docs/recipes/face-search.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Face Search
|
||||
|
||||
Build a face search system for finding people in images.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Basic Face Database
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.embeddings = {}
|
||||
|
||||
def add_face(self, person_id, image):
|
||||
"""Add a face to the database."""
|
||||
faces = self.detector.detect(image)
|
||||
if not faces:
|
||||
raise ValueError(f"No face found for {person_id}")
|
||||
|
||||
face = max(faces, key=lambda f: f.confidence)
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
self.embeddings[person_id] = embedding
|
||||
return True
|
||||
|
||||
def search(self, image, threshold=0.6):
|
||||
"""Search for faces in an image."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in self.embeddings.items():
|
||||
similarity = np.dot(embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
results.append({
|
||||
'bbox': face.bbox,
|
||||
'match': best_match if best_similarity >= threshold else None,
|
||||
'similarity': best_similarity
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def save(self, path):
|
||||
"""Save database to file."""
|
||||
np.savez(path, embeddings=dict(self.embeddings))
|
||||
|
||||
def load(self, path):
|
||||
"""Load database from file."""
|
||||
data = np.load(path, allow_pickle=True)
|
||||
self.embeddings = data['embeddings'].item()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
|
||||
# Add faces
|
||||
for image_path in Path("known_faces/").glob("*.jpg"):
|
||||
person_id = image_path.stem
|
||||
image = cv2.imread(str(image_path))
|
||||
try:
|
||||
db.add_face(person_id, image)
|
||||
print(f"Added: {person_id}")
|
||||
except ValueError as e:
|
||||
print(f"Skipped: {e}")
|
||||
|
||||
# Save database
|
||||
db.save("face_database.npz")
|
||||
|
||||
# Search
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
results = db.search(query_image)
|
||||
|
||||
for r in results:
|
||||
if r['match']:
|
||||
print(f"Found: {r['match']} (similarity: {r['similarity']:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def visualize_search_results(image, results):
|
||||
"""Draw search results on image."""
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0) # Green for match
|
||||
label = f"{r['match']} ({r['similarity']:.2f})"
|
||||
else:
|
||||
color = (0, 0, 255) # Red for unknown
|
||||
label = f"Unknown ({r['similarity']:.2f})"
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return image
|
||||
|
||||
# Usage
|
||||
results = db.search(image)
|
||||
annotated = visualize_search_results(image.copy(), results)
|
||||
cv2.imwrite("search_result.jpg", annotated)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Real-Time Search
|
||||
|
||||
```python
|
||||
import cv2
|
||||
|
||||
def realtime_search(db):
|
||||
"""Real-time face search from webcam."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
results = db.search(frame, threshold=0.5)
|
||||
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0)
|
||||
label = r['match']
|
||||
else:
|
||||
color = (0, 0, 255)
|
||||
label = "Unknown"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Search", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
db.load("face_database.npz")
|
||||
realtime_search(db)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
281
docs/recipes/image-pipeline.md
Normal file
281
docs/recipes/image-pipeline.md
Normal file
@@ -0,0 +1,281 @@
|
||||
# Image Pipeline
|
||||
|
||||
A complete pipeline for processing images with detection, recognition, and attribute analysis.
|
||||
|
||||
---
|
||||
|
||||
## Basic Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
def process_image(image_path):
|
||||
"""Process a single image through the full pipeline."""
|
||||
# Load image
|
||||
image = cv2.imread(image_path)
|
||||
|
||||
# Step 1: Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f"Found {len(faces)} face(s)")
|
||||
|
||||
results = []
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
# Step 2: Extract embedding
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
# Step 3: Predict attributes
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
|
||||
results.append({
|
||||
'face_id': i,
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'embedding': embedding,
|
||||
'gender': attrs.sex,
|
||||
'age': attrs.age
|
||||
})
|
||||
|
||||
print(f" Face {i+1}: {attrs.sex}, {attrs.age} years old")
|
||||
|
||||
# Visualize
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
return image, results
|
||||
|
||||
# Usage
|
||||
result_image, results = process_image("photo.jpg")
|
||||
cv2.imwrite("result.jpg", result_image)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Using FaceAnalyzer
|
||||
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
)
|
||||
|
||||
# Process image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
# Access enriched Face objects
|
||||
for face in faces:
|
||||
print(f"Confidence: {face.confidence:.2f}")
|
||||
print(f"Embedding: {face.embedding.shape}")
|
||||
print(f"Age: {face.age}, Gender: {face.sex}")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Full Analysis Pipeline
|
||||
|
||||
Complete pipeline with all modules:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
# Initialize all models
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.age_gender = AgeGender()
|
||||
self.fairface = FairFace()
|
||||
self.landmarker = Landmark106()
|
||||
self.gaze = MobileGaze()
|
||||
self.parser = BiSeNet()
|
||||
self.spoofer = MiniFASNet()
|
||||
|
||||
def analyze(self, image):
|
||||
"""Run full analysis pipeline."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
result = {
|
||||
'bbox': face.bbox,
|
||||
'confidence': face.confidence,
|
||||
'landmarks_5': face.landmarks
|
||||
}
|
||||
|
||||
# Recognition embedding
|
||||
result['embedding'] = self.recognizer.get_normalized_embedding(
|
||||
image, face.landmarks
|
||||
)
|
||||
|
||||
# Attributes
|
||||
ag_result = self.age_gender.predict(image, face.bbox)
|
||||
result['age'] = ag_result.age
|
||||
result['gender'] = ag_result.sex
|
||||
|
||||
# FairFace attributes
|
||||
ff_result = self.fairface.predict(image, face.bbox)
|
||||
result['age_group'] = ff_result.age_group
|
||||
result['race'] = ff_result.race
|
||||
|
||||
# 106-point landmarks
|
||||
result['landmarks_106'] = self.landmarker.get_landmarks(
|
||||
image, face.bbox
|
||||
)
|
||||
|
||||
# Gaze estimation
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = self.gaze.estimate(face_crop)
|
||||
result['gaze_pitch'] = gaze_result.pitch
|
||||
result['gaze_yaw'] = gaze_result.yaw
|
||||
|
||||
# Face parsing
|
||||
if face_crop.size > 0:
|
||||
result['parsing_mask'] = self.parser.parse(face_crop)
|
||||
|
||||
# Anti-spoofing
|
||||
spoof_result = self.spoofer.predict(image, face.bbox)
|
||||
result['is_real'] = spoof_result.is_real
|
||||
result['spoof_confidence'] = spoof_result.confidence
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
# Usage
|
||||
pipeline = FaceAnalysisPipeline()
|
||||
results = pipeline.analyze(cv2.imread("photo.jpg"))
|
||||
|
||||
for i, r in enumerate(results):
|
||||
print(f"\nFace {i+1}:")
|
||||
print(f" Gender: {r['gender']}, Age: {r['age']}")
|
||||
print(f" Race: {r['race']}, Age Group: {r['age_group']}")
|
||||
print(f" Gaze: pitch={np.degrees(r['gaze_pitch']):.1f}°")
|
||||
print(f" Real: {r['is_real']} ({r['spoof_confidence']:.1%})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Visualization Pipeline
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
gaze = MobileGaze()
|
||||
|
||||
image = cv2.imread(image_path)
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Age and gender
|
||||
attrs = age_gender.predict(image, face.bbox)
|
||||
label = f"{attrs.sex}, {attrs.age}y"
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
|
||||
|
||||
# Gaze
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
if face_crop.size > 0:
|
||||
gaze_result = gaze.estimate(face_crop)
|
||||
draw_gaze(image, face.bbox, gaze_result.pitch, gaze_result.yaw)
|
||||
|
||||
# Confidence
|
||||
conf_label = f"{face.confidence:.0%}"
|
||||
cv2.putText(image, conf_label, (x1, y2 + 20),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
||||
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Saved to {output_path}")
|
||||
|
||||
# Usage
|
||||
visualize_analysis("input.jpg", "output.jpg")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JSON Output
|
||||
|
||||
Export results to JSON:
|
||||
|
||||
```python
|
||||
import json
|
||||
import numpy as np
|
||||
|
||||
def results_to_json(results):
|
||||
"""Convert analysis results to JSON-serializable format."""
|
||||
output = []
|
||||
|
||||
for r in results:
|
||||
item = {
|
||||
'bbox': r['bbox'].tolist(),
|
||||
'confidence': float(r['confidence']),
|
||||
'age': int(r['age']) if r.get('age') else None,
|
||||
'gender': r.get('gender'),
|
||||
'race': r.get('race'),
|
||||
'is_real': r.get('is_real'),
|
||||
'gaze': {
|
||||
'pitch_deg': float(np.degrees(r['gaze_pitch'])) if 'gaze_pitch' in r else None,
|
||||
'yaw_deg': float(np.degrees(r['gaze_yaw'])) if 'gaze_yaw' in r else None
|
||||
}
|
||||
}
|
||||
output.append(item)
|
||||
|
||||
return output
|
||||
|
||||
# Usage
|
||||
results = pipeline.analyze(image)
|
||||
json_data = results_to_json(results)
|
||||
|
||||
with open('results.json', 'w') as f:
|
||||
json.dump(json_data, f, indent=2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
- [Batch Processing](batch-processing.md) - Process multiple images
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Face Search](face-search.md) - Build a search system
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Recognition Module](../modules/recognition.md) - Recognition details
|
||||
125
docs/recipes/video-webcam.md
Normal file
125
docs/recipes/video-webcam.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Video & Webcam
|
||||
|
||||
Real-time face analysis for video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Detection
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=[f.bbox for f in faces],
|
||||
scores=[f.confidence for f in faces],
|
||||
landmarks=[f.landmarks for f in faces]
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Video File Processing
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
# Setup output
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
while cap.read()[0]:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
# ... process and draw ...
|
||||
|
||||
out.write(frame)
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
|
||||
# Usage
|
||||
process_video("input.mp4", "output.mp4")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
|
||||
```python
|
||||
PROCESS_EVERY_N = 3 # Process every 3rd frame
|
||||
frame_count = 0
|
||||
last_faces = []
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if frame_count % PROCESS_EVERY_N == 0:
|
||||
last_faces = detector.detect(frame)
|
||||
frame_count += 1
|
||||
# Draw last_faces...
|
||||
```
|
||||
|
||||
### FPS Counter
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
prev_time = time.time()
|
||||
while True:
|
||||
curr_time = time.time()
|
||||
fps = 1 / (curr_time - prev_time)
|
||||
prev_time = curr_time
|
||||
|
||||
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
225
docs/stylesheets/extra.css
Normal file
225
docs/stylesheets/extra.css
Normal file
@@ -0,0 +1,225 @@
|
||||
/* UniFace Documentation - Custom Styles */
|
||||
|
||||
/* ===== Hero Section ===== */
|
||||
|
||||
.md-content .hero {
|
||||
text-align: center;
|
||||
padding: 3rem 1rem 2rem;
|
||||
margin: 0 auto;
|
||||
max-width: 900px;
|
||||
}
|
||||
|
||||
.hero-title {
|
||||
font-size: 3.5rem !important;
|
||||
font-weight: 800 !important;
|
||||
margin-bottom: 0.5rem !important;
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #7c4dff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
.hero-tagline {
|
||||
font-size: 1.5rem;
|
||||
color: var(--md-default-fg-color);
|
||||
margin-bottom: 0.5rem !important;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
margin-bottom: 1.5rem !important;
|
||||
font-weight: 400;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
margin: 0.5rem 0.25rem;
|
||||
padding: 0.7rem 1.5rem;
|
||||
font-weight: 600;
|
||||
border-radius: 8px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.hero .md-button--primary {
|
||||
background: linear-gradient(135deg, var(--md-primary-fg-color) 0%, #5c6bc0 100%);
|
||||
border: none;
|
||||
box-shadow: 0 4px 14px rgba(63, 81, 181, 0.4);
|
||||
}
|
||||
|
||||
.hero .md-button--primary:hover {
|
||||
transform: translateY(-2px);
|
||||
box-shadow: 0 6px 20px rgba(63, 81, 181, 0.5);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid var(--md-primary-fg-color);
|
||||
background: transparent;
|
||||
color: var(--md-primary-fg-color);
|
||||
}
|
||||
|
||||
.hero .md-button:not(.md-button--primary):hover {
|
||||
background: var(--md-primary-fg-color);
|
||||
border-color: var(--md-primary-fg-color);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* Badge styling in hero */
|
||||
.hero p a img {
|
||||
margin: 0 3px;
|
||||
height: 24px !important;
|
||||
}
|
||||
|
||||
/* ===== Feature Grid ===== */
|
||||
.feature-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.5rem;
|
||||
border-radius: 12px;
|
||||
background: var(--md-code-bg-color);
|
||||
border: 1px solid var(--md-default-fg-color--lightest);
|
||||
transition: all 0.3s ease;
|
||||
position: relative;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.feature-card::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
height: 3px;
|
||||
background: linear-gradient(90deg, var(--md-primary-fg-color), #7c4dff);
|
||||
opacity: 0;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.feature-card:hover {
|
||||
transform: translateY(-4px);
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.1);
|
||||
border-color: var(--md-primary-fg-color--light);
|
||||
}
|
||||
|
||||
.feature-card:hover::before {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.feature-card h3 {
|
||||
margin-top: 0 !important;
|
||||
margin-bottom: 0.75rem !important;
|
||||
font-size: 1rem !important;
|
||||
font-weight: 600;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.feature-card p {
|
||||
margin: 0;
|
||||
font-size: 0.875rem;
|
||||
color: var(--md-default-fg-color--light);
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.feature-card a {
|
||||
display: inline-block;
|
||||
margin-top: 0.75rem;
|
||||
font-weight: 500;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* ===== Next Steps Grid (2 columns) ===== */
|
||||
.next-steps-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 1.25rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card {
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
.next-steps-grid .feature-card h3 {
|
||||
font-size: 1.1rem !important;
|
||||
}
|
||||
|
||||
/* ===== Dark Mode Adjustments ===== */
|
||||
[data-md-color-scheme="slate"] .hero-title {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
background-clip: text;
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .feature-card:hover {
|
||||
box-shadow: 0 12px 24px rgba(0, 0, 0, 0.3);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary {
|
||||
background: linear-gradient(135deg, #7c4dff 0%, #b388ff 100%);
|
||||
box-shadow: 0 4px 14px rgba(124, 77, 255, 0.4);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button--primary:hover {
|
||||
box-shadow: 0 6px 20px rgba(124, 77, 255, 0.5);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary) {
|
||||
border: 2px solid rgba(255, 255, 255, 0.3);
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
[data-md-color-scheme="slate"] .hero .md-button:not(.md-button--primary):hover {
|
||||
background: rgba(255, 255, 255, 0.1);
|
||||
border-color: rgba(255, 255, 255, 0.5);
|
||||
color: white;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
|
||||
/* ===== Responsive Design ===== */
|
||||
@media (max-width: 1200px) {
|
||||
.feature-grid {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.hero-title {
|
||||
font-size: 2.5rem !important;
|
||||
}
|
||||
|
||||
.hero-subtitle {
|
||||
font-size: 1.1rem;
|
||||
}
|
||||
|
||||
.feature-grid,
|
||||
.next-steps-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.hero .md-button {
|
||||
display: block;
|
||||
margin: 0.5rem auto;
|
||||
max-width: 200px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.hero-title {
|
||||
font-size: 2rem !important;
|
||||
}
|
||||
|
||||
.feature-card {
|
||||
padding: 1.25rem;
|
||||
}
|
||||
}
|
||||
@@ -25,7 +25,14 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -q uniface"
|
||||
"%pip install -q uniface\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,7 +51,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -71,19 +78,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"detector = RetinaFace(\n",
|
||||
" conf_thresh=0.5,\n",
|
||||
" nms_thresh=0.4,\n",
|
||||
" confidence_threshold=0.5,\n",
|
||||
" nms_threshold=0.4,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -153,14 +152,14 @@
|
||||
"# Load image\n",
|
||||
"image = cv2.imread(image_path)\n",
|
||||
"\n",
|
||||
"# Detect faces - returns list of face dictionaries\n",
|
||||
"# Detect faces - returns list of Face objects\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"# Unpack face data for visualization\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"# Draw detections\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
@@ -211,9 +210,9 @@
|
||||
"faces = detector.detect(image, max_num=2)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -258,9 +257,9 @@
|
||||
"faces = detector.detect(image, max_num=5)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -274,7 +273,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
|
||||
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
|
||||
"- Use `max_num` to limit detected faces"
|
||||
]
|
||||
@@ -29,7 +29,14 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -q uniface"
|
||||
"%pip install -q uniface\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -48,7 +55,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -76,19 +83,11 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"detector = RetinaFace(\n",
|
||||
" conf_thresh=0.5,\n",
|
||||
" nms_thresh=0.4,\n",
|
||||
" confidence_threshold=0.5,\n",
|
||||
" nms_threshold=0.4,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -140,13 +139,13 @@
|
||||
"\n",
|
||||
" # Draw detections\n",
|
||||
" bbox_image = image.copy()\n",
|
||||
" bboxes = [f['bbox'] for f in faces]\n",
|
||||
" scores = [f['confidence'] for f in faces]\n",
|
||||
" landmarks = [f['landmarks'] for f in faces]\n",
|
||||
" bboxes = [f.bbox for f in faces]\n",
|
||||
" scores = [f.confidence for f in faces]\n",
|
||||
" landmarks = [f.landmarks for f in faces]\n",
|
||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
||||
" first_landmarks = faces[0]['landmarks']\n",
|
||||
" first_landmarks = faces[0].landmarks\n",
|
||||
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
|
||||
"\n",
|
||||
" # Convert BGR to RGB for visualization\n",
|
||||
@@ -202,7 +201,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
|
||||
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
|
||||
"- Default output size is 112x112 (standard for face recognition models)\n"
|
||||
]
|
||||
@@ -25,7 +25,14 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -q uniface"
|
||||
"%pip install -q uniface\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -66,19 +73,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n",
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace()\n",
|
||||
")"
|
||||
]
|
||||
@@ -11,7 +11,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -23,7 +23,14 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -q uniface"
|
||||
"%pip install -q uniface\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -42,7 +49,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -74,7 +81,7 @@
|
||||
],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace()\n",
|
||||
")"
|
||||
]
|
||||
@@ -25,7 +25,14 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -q uniface"
|
||||
"%pip install -q uniface\n",
|
||||
"\n",
|
||||
"# Clone repo for assets (Colab only)\n",
|
||||
"import os\n",
|
||||
"if 'COLAB_GPU' in os.environ or 'COLAB_RELEASE_TAG' in os.environ:\n",
|
||||
" if not os.path.exists('uniface'):\n",
|
||||
" !git clone --depth 1 https://github.com/yakhyo/uniface.git\n",
|
||||
" os.chdir('uniface/examples')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -44,7 +51,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -75,20 +82,10 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n",
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n",
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace(),\n",
|
||||
" age_gender=AgeGender()\n",
|
||||
")"
|
||||
394
examples/06_face_parsing.ipynb
Normal file
394
examples/06_face_parsing.ipynb
Normal file
File diff suppressed because one or more lines are too long
324
examples/07_face_anonymization.ipynb
Normal file
324
examples/07_face_anonymization.ipynb
Normal file
File diff suppressed because one or more lines are too long
268
examples/08_gaze_estimation.ipynb
Normal file
268
examples/08_gaze_estimation.ipynb
Normal file
File diff suppressed because one or more lines are too long
164
mkdocs.yml
Normal file
164
mkdocs.yml
Normal file
@@ -0,0 +1,164 @@
|
||||
site_name: UniFace
|
||||
site_description: All-in-One Face Analysis Library with ONNX Runtime
|
||||
site_author: Yakhyokhuja Valikhujaev
|
||||
site_url: https://yakhyo.github.io/uniface
|
||||
|
||||
repo_name: yakhyo/uniface
|
||||
repo_url: https://github.com/yakhyo/uniface
|
||||
edit_uri: edit/main/docs/
|
||||
|
||||
copyright: Copyright © 2025 Yakhyokhuja Valikhujaev
|
||||
|
||||
theme:
|
||||
name: material
|
||||
custom_dir: docs/overrides
|
||||
palette:
|
||||
- media: "(prefers-color-scheme)"
|
||||
toggle:
|
||||
icon: material/link
|
||||
name: Switch to light mode
|
||||
- media: "(prefers-color-scheme: light)"
|
||||
scheme: default
|
||||
primary: indigo
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch
|
||||
name: Switch to dark mode
|
||||
- media: "(prefers-color-scheme: dark)"
|
||||
scheme: slate
|
||||
primary: black
|
||||
accent: indigo
|
||||
toggle:
|
||||
icon: material/toggle-switch-off-outline
|
||||
name: Switch to system preference
|
||||
font:
|
||||
text: Roboto
|
||||
code: Roboto Mono
|
||||
features:
|
||||
- navigation.tabs
|
||||
- navigation.top
|
||||
- navigation.footer
|
||||
- navigation.indexes
|
||||
- navigation.instant
|
||||
- navigation.tracking
|
||||
- search.suggest
|
||||
- search.highlight
|
||||
- content.code.copy
|
||||
- content.code.annotate
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
logo: material/book-open-page-variant
|
||||
repo: fontawesome/brands/git-alt
|
||||
admonition:
|
||||
note: octicons/tag-16
|
||||
abstract: octicons/checklist-16
|
||||
info: octicons/info-16
|
||||
tip: octicons/squirrel-16
|
||||
success: octicons/check-16
|
||||
question: octicons/question-16
|
||||
warning: octicons/alert-16
|
||||
failure: octicons/x-circle-16
|
||||
danger: octicons/zap-16
|
||||
bug: octicons/bug-16
|
||||
example: octicons/beaker-16
|
||||
quote: octicons/quote-16
|
||||
|
||||
extra:
|
||||
social:
|
||||
- icon: fontawesome/brands/github
|
||||
link: https://github.com/yakhyo
|
||||
- icon: fontawesome/brands/python
|
||||
link: https://pypi.org/project/uniface/
|
||||
- icon: fontawesome/brands/x-twitter
|
||||
link: https://x.com/y_valikhujaev
|
||||
analytics:
|
||||
provider: google
|
||||
property: G-FGEHR2K5ZE
|
||||
|
||||
extra_css:
|
||||
- stylesheets/extra.css
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
- footnotes
|
||||
- attr_list
|
||||
- md_in_html
|
||||
- def_list
|
||||
- tables
|
||||
- toc:
|
||||
permalink: false
|
||||
toc_depth: 3
|
||||
- pymdownx.superfences:
|
||||
custom_fences:
|
||||
- name: mermaid
|
||||
class: mermaid
|
||||
format: !!python/name:pymdownx.superfences.fence_code_format
|
||||
- pymdownx.details
|
||||
- pymdownx.highlight:
|
||||
anchor_linenums: true
|
||||
line_spans: __span
|
||||
pygments_lang_class: true
|
||||
- pymdownx.inlinehilite
|
||||
- pymdownx.snippets
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- pymdownx.emoji:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- pymdownx.tasklist:
|
||||
custom_checkbox: true
|
||||
- pymdownx.keys
|
||||
- pymdownx.mark
|
||||
- pymdownx.critic
|
||||
- pymdownx.caret
|
||||
- pymdownx.tilde
|
||||
|
||||
plugins:
|
||||
- search
|
||||
- git-committers:
|
||||
repository: yakhyo/uniface
|
||||
branch: main
|
||||
token: !ENV MKDOCS_GIT_COMMITTERS_APIKEY
|
||||
- git-revision-date-localized:
|
||||
enable_creation_date: true
|
||||
type: timeago
|
||||
|
||||
nav:
|
||||
- Home: index.md
|
||||
- Getting Started:
|
||||
- Installation: installation.md
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
- Face Search: recipes/face-search.md
|
||||
- Batch Processing: recipes/batch-processing.md
|
||||
- Anonymize Stream: recipes/anonymize-stream.md
|
||||
- Custom Models: recipes/custom-models.md
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
- Coordinate Systems: concepts/coordinate-systems.md
|
||||
- Execution Providers: concepts/execution-providers.md
|
||||
- Model Cache: concepts/model-cache-offline.md
|
||||
- Thresholds: concepts/thresholds-calibration.md
|
||||
- Resources:
|
||||
- Contributing: contributing.md
|
||||
- License: license-attribution.md
|
||||
- Releases: https://github.com/yakhyo/uniface/releases
|
||||
- Discussions: https://github.com/yakhyo/uniface/discussions
|
||||
@@ -1,9 +1,9 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.3.2"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
version = "2.2.1"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
@@ -14,6 +14,9 @@ keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
@@ -21,13 +24,13 @@ keywords = [
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
@@ -86,13 +89,51 @@ exclude = [
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Emotion detection
|
||||
python scripts/run_emotion.py --image assets/test.jpg
|
||||
python scripts/run_emotion.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--method` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.25) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
@@ -1,128 +0,0 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f' Face {i + 1}: {gender_str}, {age} years old')
|
||||
draw_age_gender_label(image, face['bbox'], gender_id, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
|
||||
draw_age_gender_label(frame, face['bbox'], gender_id, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,110 +0,0 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5N)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,126 +0,0 @@
|
||||
# Emotion detection on detected faces
|
||||
# Usage: python run_emotion.py --image path/to/image.jpg
|
||||
# python run_emotion.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
|
||||
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
|
||||
draw_emotion_label(image, face['bbox'], emotion, confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
|
||||
draw_emotion_label(frame, face['bbox'], emotion, confidence)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, emotion_predictor, args.threshold)
|
||||
else:
|
||||
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,116 +0,0 @@
|
||||
# Face analysis using FaceAnalyzer
|
||||
# Usage: python run_face_analyzer.py --image path/to/image.jpg
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f'Error: Image not found: {args.image}')
|
||||
return
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,97 +0,0 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0]['landmarks']
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError('Webcam could not be opened.')
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
landmarks = face['landmarks']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--image', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.image}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,117 +0,0 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Face {i + 1}',
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,109 +0,0 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input video path')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,7 +1,15 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -24,19 +32,22 @@ def test_model_initialization(age_gender_model):
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
|
||||
assert isinstance(age, int), f'Age should be int, got {type(age)}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= age <= 120, f'Age out of range for image size {size}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
|
||||
assert age1 == age2, 'Same input should produce same age prediction'
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,25 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender_id, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for gender_id, age in results:
|
||||
assert gender_id in [0, 1]
|
||||
assert 0 <= age <= 120
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f'Age should be non-negative, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -35,8 +43,8 @@ def test_create_detector_with_config():
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
@@ -53,7 +61,7 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
@@ -141,13 +149,13 @@ def test_detect_faces_with_threshold():
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -246,8 +254,8 @@ def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
122
tests/test_parsing.py
Normal file
122
tests/test_parsing.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,9 +17,9 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
@@ -27,15 +35,15 @@ def test_inference_on_640x640_image(retinaface_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -45,7 +53,7 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,8 +17,8 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,15 +33,15 @@ def test_inference_on_640x640_image(scrfd_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -43,7 +51,7 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
@@ -63,7 +71,7 @@ def test_different_input_sizes(scrfd_model):
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
282
tests/test_types.py
Normal file
282
tests/test_types.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -116,7 +124,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -259,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
121
tools/README.md
Normal file
121
tools/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
```
|
||||
213
tools/age_gender.py
Normal file
213
tools/age_gender.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,5 +1,12 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
@@ -28,9 +35,9 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
202
tools/detection.py
Normal file
202
tools/detection.py
Normal file
@@ -0,0 +1,202 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face']
|
||||
)
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
elif args.method == 'yolov5face':
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
else: # yolov8face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
239
tools/face_analyzer.py
Normal file
239
tools/face_analyzer.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
tools/face_anonymize.py
Normal file
281
tools/face_anonymize.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
213
tools/face_emotion.py
Normal file
213
tools/face_emotion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
250
tools/face_parsing.py
Normal file
250
tools/face_parsing.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
bbox: np.ndarray,
|
||||
image_shape: tuple[int, int],
|
||||
expand_ratio: float = 0.2,
|
||||
expand_top_ratio: float = 0.4,
|
||||
) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Expand bounding box to include full head region for face parsing.
|
||||
|
||||
Face detection typically returns tight face boxes, but face parsing
|
||||
requires the full head including hair, ears, and neck.
|
||||
|
||||
Args:
|
||||
bbox: Original bounding box [x1, y1, x2, y2].
|
||||
image_shape: Image dimensions as (height, width).
|
||||
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
|
||||
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
height, width = image_shape[:2]
|
||||
|
||||
face_width = x2 - x1
|
||||
face_height = y2 - y1
|
||||
|
||||
expand_x = int(face_width * expand_ratio)
|
||||
expand_y_bottom = int(face_height * expand_ratio)
|
||||
expand_y_top = int(face_height * expand_top_ratio)
|
||||
|
||||
new_x1 = max(0, x1 - expand_x)
|
||||
new_y1 = max(0, y1 - expand_y_top)
|
||||
new_x2 = min(width, x2 + expand_x)
|
||||
new_y2 = min(height, y2 + expand_y_bottom)
|
||||
|
||||
return new_x1, new_y1, new_x2, new_y2
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
parser_arg.add_argument(
|
||||
'--expand-ratio',
|
||||
type=float,
|
||||
default=0.2,
|
||||
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/face_search.py
Normal file
190
tools/face_search.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.reference):
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/fairface.py
Normal file
214
tools/fairface.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/gaze_estimation.py
Normal file
190
tools/gaze_estimation.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
187
tools/landmarks.py
Normal file
187
tools/landmarks.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,6 +1,13 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
214
tools/spoofing.py
Normal file
214
tools/spoofing.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face Anti-Spoofing Detection.
|
||||
|
||||
Usage:
|
||||
python tools/spoofing.py --source path/to/image.jpg
|
||||
python tools/spoofing.py --source path/to/video.mp4
|
||||
python tools/spoofing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
bbox: list,
|
||||
is_real: bool,
|
||||
confidence: float,
|
||||
thickness: int = 2,
|
||||
) -> None:
|
||||
"""Draw bounding box with anti-spoofing result.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box in [x1, y1, x2, y2] format.
|
||||
is_real: True if real face, False if fake.
|
||||
confidence: Confidence score (0.0 to 1.0).
|
||||
thickness: Line thickness for bounding box.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
|
||||
color = (0, 255, 0) if is_real else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
label = 'Real' if is_real else 'Fake'
|
||||
text = f'{label}: {confidence:.1%}'
|
||||
|
||||
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a single image for face anti-spoofing detection."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print('No faces detected in the image.')
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f' Face {i}: {label} ({result.confidence:.1%})')
|
||||
|
||||
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a video file for face anti-spoofing detection."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
|
||||
"""Run real-time anti-spoofing detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
cv2.imshow('Face Anti-Spoofing', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
default='v2',
|
||||
choices=['v1se', 'v2'],
|
||||
help='Model variant: v1se or v2 (default: v2)',
|
||||
)
|
||||
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select model variant
|
||||
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
|
||||
|
||||
# Initialize models
|
||||
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
|
||||
detector = RetinaFace()
|
||||
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, spoofer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, spoofer, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, spoofer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
tools/video_detection.py
Normal file
180
tools/video_detection.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
#
|
||||
# Licensed under the MIT License.
|
||||
# You may obtain a copy of the License at
|
||||
@@ -11,36 +11,58 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""UniFace: A comprehensive library for face analysis.
|
||||
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
- Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
- Age, gender, and emotion prediction
|
||||
- Face anti-spoofing
|
||||
- Privacy/anonymization
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.3.2'
|
||||
|
||||
__version__ = '2.2.1'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender
|
||||
from .face import Face
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
YOLOv8Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .recognition import AdaFace, ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
# Metadata
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
@@ -49,28 +71,48 @@ __all__ = [
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
'create_detector',
|
||||
'create_face_parser',
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
'YOLOv5Face',
|
||||
'YOLOv8Face',
|
||||
# Recognition models
|
||||
'AdaFace',
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'GazeResult',
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'Logger',
|
||||
'enable_logging',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
@@ -1,75 +1,103 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes."""
|
||||
"""Unified face analyzer combining detection, recognition, and attributes.
|
||||
|
||||
This class provides a high-level interface for face analysis by combining
|
||||
multiple components: face detection, recognition (embedding extraction),
|
||||
and attribute prediction (age, gender, race).
|
||||
|
||||
Args:
|
||||
detector: Face detector instance for detecting faces in images.
|
||||
recognizer: Optional face recognizer for extracting embeddings.
|
||||
age_gender: Optional age/gender predictor.
|
||||
fairface: Optional FairFace predictor for demographics.
|
||||
|
||||
Example:
|
||||
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
>>> detector = RetinaFace()
|
||||
>>> recognizer = ArcFace()
|
||||
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
>>> faces = analyzer.analyze(image)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: Optional[BaseRecognizer] = None,
|
||||
age_gender: Optional[AgeGender] = None,
|
||||
recognizer: BaseRecognizer | None = None,
|
||||
age_gender: AgeGender | None = None,
|
||||
fairface: FairFace | None = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
self.fairface = fairface
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
if fairface:
|
||||
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
def analyze(self, image: np.ndarray) -> list[Face]:
|
||||
"""Analyze faces in an image.
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
Performs face detection and optionally extracts embeddings and
|
||||
predicts attributes for each detected face.
|
||||
|
||||
embedding = None
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
|
||||
Returns:
|
||||
List of Face objects with detection results and any predicted attributes.
|
||||
"""
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender, age = self.age_gender.predict(image, bbox)
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
|
||||
result = self.age_gender.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender=gender,
|
||||
)
|
||||
faces.append(face)
|
||||
if self.fairface is not None:
|
||||
try:
|
||||
result = self.fairface.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
@@ -80,4 +108,6 @@ class FaceAnalyzer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
if self.fairface:
|
||||
parts.append(f'fairface={self.fairface.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
@@ -20,19 +24,30 @@ except ImportError:
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
|
||||
__all__ = [
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
def create_attribute_predictor(
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
@@ -41,11 +56,13 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -54,46 +71,44 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
|
||||
f'Unsupported attribute model: {model_name}. '
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender_id, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender_id'] = gender_id
|
||||
face['attributes']['age'] = age
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return detections
|
||||
return faces
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
@@ -30,12 +30,15 @@ class AgeGender(Attribute):
|
||||
Defaults to `AgeGenderWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: Optional[Tuple[int, int]] = None,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
providers: list[str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
@@ -44,10 +47,13 @@ class AgeGender(Attribute):
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
"""
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._user_input_size = input_size # Store user preference
|
||||
self.providers = providers
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
@@ -55,7 +61,7 @@ class AgeGender(Attribute):
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
self.session = create_onnx_session(self.model_path, providers=self.providers)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
@@ -81,7 +87,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
@@ -111,7 +117,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -119,16 +125,15 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
|
||||
and age (in years).
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender_id, age
|
||||
return AttributeResult(gender=gender, age=age)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -137,75 +142,8 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender_id, age = self.postprocess(prediction)
|
||||
return gender_id, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender_id, age = age_gender_predictor.predict(frame, box)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{gender_str}, {age}'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
return self.postprocess(prediction)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
@@ -7,6 +7,10 @@ from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.types import EmotionResult
|
||||
|
||||
__all__ = ['Emotion']
|
||||
|
||||
@@ -29,7 +29,7 @@ class Emotion(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
@@ -81,7 +81,7 @@ class Emotion(Attribute):
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
@@ -106,7 +106,7 @@ class Emotion(Attribute):
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
@@ -114,9 +114,9 @@ class Emotion(Attribute):
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
return EmotionResult(emotion=emotion_label, confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
@@ -127,68 +127,3 @@ class Emotion(Attribute):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{emotion} ({confidence:.2f})'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
|
||||
199
uniface/attribute/fairface.py
Normal file
199
uniface/attribute/fairface.py
Normal file
@@ -0,0 +1,199 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import FairFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
|
||||
|
||||
# Label definitions
|
||||
RACE_LABELS = [
|
||||
'White',
|
||||
'Black',
|
||||
'Latino Hispanic',
|
||||
'East Asian',
|
||||
'Southeast Asian',
|
||||
'Indian',
|
||||
'Middle Eastern',
|
||||
]
|
||||
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
|
||||
|
||||
|
||||
class FairFace(Attribute):
|
||||
"""
|
||||
FairFace attribute prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting race (7 categories), gender (2 categories),
|
||||
and age (9 groups) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
The model is trained on the FairFace dataset which provides balanced demographics
|
||||
for more equitable predictions across different racial and gender groups.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
Defaults to `FairFaceWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224). Defaults to None.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
providers: list[str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FairFace prediction model.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224).
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
"""
|
||||
Logger.info(f'Initializing FairFace with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self.input_size = input_size if input_size is not None else (224, 224)
|
||||
self.providers = providers
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path, providers=self.providers)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load FairFace model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Preprocesses the face image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
# Crop face if bbox provided
|
||||
if bbox is not None:
|
||||
bbox = np.asarray(bbox, dtype=int)
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
|
||||
# Add padding (25% of face size)
|
||||
w, h = x2 - x1, y2 - y1
|
||||
padding = 0.25
|
||||
x_pad = int(w * padding)
|
||||
y_pad = int(h * padding)
|
||||
|
||||
x1 = max(0, x1 - x_pad)
|
||||
y1 = max(0, y1 - y_pad)
|
||||
x2 = min(image.shape[1], x2 + x_pad)
|
||||
y2 = min(image.shape[0], y2 + y_pad)
|
||||
|
||||
image = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize to input size (width, height for cv2.resize)
|
||||
image = cv2.resize(image, self.input_size[::-1])
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize with ImageNet mean and std
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# Transpose to CHW format and add batch dimension
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract race, gender, and age.
|
||||
|
||||
Args:
|
||||
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
|
||||
(race_logits, gender_logits, age_logits).
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
|
||||
"""
|
||||
race_logits, gender_logits, age_logits = prediction
|
||||
|
||||
# Apply softmax
|
||||
race_probs = self._softmax(race_logits[0])
|
||||
gender_probs = self._softmax(gender_logits[0])
|
||||
age_probs = self._softmax(age_logits[0])
|
||||
|
||||
# Get predictions
|
||||
race_idx = int(np.argmax(race_probs))
|
||||
raw_gender_idx = int(np.argmax(gender_probs))
|
||||
age_idx = int(np.argmax(age_probs))
|
||||
|
||||
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
|
||||
gender = 1 - raw_gender_idx
|
||||
|
||||
return AttributeResult(
|
||||
gender=gender,
|
||||
age_group=AGE_LABELS[age_idx],
|
||||
race=RACE_LABELS[race_idx],
|
||||
)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
|
||||
"""
|
||||
Predicts race, gender, and age for a face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing:
|
||||
- gender: 0=Female, 1=Male
|
||||
- age_group: Age range string like "20-29"
|
||||
- race: Race/ethnicity label
|
||||
"""
|
||||
# Preprocess
|
||||
input_blob = self.preprocess(image, bbox)
|
||||
|
||||
# Inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
|
||||
|
||||
# Postprocess
|
||||
return self.postprocess(outputs)
|
||||
|
||||
@staticmethod
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
"""Compute softmax values for numerical stability."""
|
||||
exp_x = np.exp(x - np.max(x))
|
||||
return exp_x / np.sum(exp_x)
|
||||
@@ -1,35 +1,43 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
'resize_image',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
def resize_image(
|
||||
frame: np.ndarray,
|
||||
target_shape: tuple[int, int] = (640, 640),
|
||||
) -> tuple[np.ndarray, float]:
|
||||
"""Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
The image is resized to fit within the target dimensions and placed on a
|
||||
blank canvas (zero-padded to target size).
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
frame: Input image with shape (H, W, C).
|
||||
target_shape: Target size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
A tuple containing:
|
||||
- Resized image on a blank canvas with shape (height, width, 3).
|
||||
- The resize factor as a float.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
@@ -53,16 +61,16 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
image_size: Input image size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
|
||||
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
|
||||
"""
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
@@ -85,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
|
||||
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
|
||||
|
||||
Args:
|
||||
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
dets: Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold: IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
List[int]: Indices of bounding boxes retained after suppression.
|
||||
Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -125,18 +132,22 @@ def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time (RetinaFace specific).
|
||||
def decode_boxes(
|
||||
loc: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode locations from predictions using priors (RetinaFace specific).
|
||||
|
||||
Undoes the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
loc: Location predictions for loc layers, shape: [num_priors, 4].
|
||||
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
|
||||
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
|
||||
Decoded bounding box predictions with shape [num_priors, 4].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -155,18 +166,19 @@ def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[f
|
||||
|
||||
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
|
||||
predictions: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
"""Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
predictions: Landmark predictions, shape: [num_priors, 10].
|
||||
priors: Prior boxes, shape: [num_priors, 4].
|
||||
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
Decoded landmarks, shape: [num_priors, 10].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -187,18 +199,21 @@ def decode_landmarks(
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to bounding box (SCRFD specific).
|
||||
def distance2bbox(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom) with shape (n, 4).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to 4 boundaries
|
||||
(left, top, right, bottom) with shape (n, 4).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
@@ -219,17 +234,20 @@ def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to keypoints (SCRFD specific).
|
||||
def distance2kps(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded keypoints with shape (n, 2k).
|
||||
Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
@@ -33,6 +32,15 @@ class ArcFaceWeights(str, Enum):
|
||||
MNET = "arcface_mnet"
|
||||
RESNET = "arcface_resnet"
|
||||
|
||||
|
||||
class AdaFaceWeights(str, Enum):
|
||||
"""
|
||||
AdaFace model weights trained on WebFace datasets.
|
||||
https://github.com/yakhyo/adaface-onnx
|
||||
"""
|
||||
IR_18 = "adaface_ir_18"
|
||||
IR_101 = "adaface_ir_101"
|
||||
|
||||
class RetinaFaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
@@ -71,6 +79,20 @@ class YOLOv5FaceWeights(str, Enum):
|
||||
YOLOV5M = "yolov5m"
|
||||
|
||||
|
||||
class YOLOv8FaceWeights(str, Enum):
|
||||
"""
|
||||
YOLOv8-Face models trained on WIDER FACE dataset.
|
||||
Uses anchor-free design with DFL (Distribution Focal Loss) for bbox regression.
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov8-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV8_LITE_S: 7.4MB, 93.4% Easy / 91.2% Medium / 78.6% Hard (lightweight)
|
||||
- YOLOV8N: 12MB, 94.6% Easy / 92.3% Medium / 79.6% Hard (recommended)
|
||||
"""
|
||||
YOLOV8_LITE_S = "yolov8_lite_s"
|
||||
YOLOV8N = "yolov8n_face"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
"""
|
||||
Trained on AffectNet dataset.
|
||||
@@ -88,6 +110,15 @@ class AgeGenderWeights(str, Enum):
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class FairFaceWeights(str, Enum):
|
||||
"""
|
||||
FairFace attribute prediction (race, gender, age).
|
||||
Trained on FairFace dataset with balanced demographics.
|
||||
https://github.com/yakhyo/fairface-onnx
|
||||
"""
|
||||
DEFAULT = "fairface"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
@@ -96,7 +127,44 @@ class LandmarkWeights(str, Enum):
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
class GazeWeights(str, Enum):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation models.
|
||||
Trained on Gaze360 dataset.
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
"""
|
||||
RESNET18 = "gaze_resnet18"
|
||||
RESNET34 = "gaze_resnet34"
|
||||
RESNET50 = "gaze_resnet50"
|
||||
MOBILENET_V2 = "gaze_mobilenetv2"
|
||||
MOBILEONE_S0 = "gaze_mobileone_s0"
|
||||
|
||||
|
||||
class ParsingWeights(str, Enum):
|
||||
"""
|
||||
Face Parsing: Semantic Segmentation of Facial Components.
|
||||
Trained on CelebAMask-HQ dataset.
|
||||
https://github.com/yakhyo/face-parsing
|
||||
"""
|
||||
RESNET18 = "parsing_resnet18"
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
class MiniFASNetWeights(str, Enum):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing models.
|
||||
Trained on face anti-spoofing datasets.
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Model Variants:
|
||||
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
|
||||
- V2: Uses scale=2.7 for face crop (improved version)
|
||||
"""
|
||||
V1SE = "minifasnet_v1se"
|
||||
V2 = "minifasnet_v2"
|
||||
|
||||
|
||||
MODEL_URLS: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
@@ -115,6 +183,9 @@ MODEL_URLS: Dict[Enum, str] = {
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# AdaFace
|
||||
AdaFaceWeights.IR_18: 'https://github.com/yakhyo/adaface-onnx/releases/download/weights/adaface_ir_18.onnx',
|
||||
AdaFaceWeights.IR_101: 'https://github.com/yakhyo/adaface-onnx/releases/download/weights/adaface_ir_101.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
@@ -122,16 +193,33 @@ MODEL_URLS: Dict[Enum, str] = {
|
||||
YOLOv5FaceWeights.YOLOV5N: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5n_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5s_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5M: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5m_face.onnx',
|
||||
# YOLOv8-Face
|
||||
YOLOv8FaceWeights.YOLOV8_LITE_S: 'https://github.com/yakhyo/yolov8-face-onnx-inference/releases/download/weights/yolov8-lite-s.onnx',
|
||||
YOLOv8FaceWeights.YOLOV8N: 'https://github.com/yakhyo/yolov8-face-onnx-inference/releases/download/weights/yolov8n-face.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
GazeWeights.RESNET18: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
|
||||
GazeWeights.RESNET34: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet34_gaze.onnx',
|
||||
GazeWeights.RESNET50: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet50_gaze.onnx',
|
||||
GazeWeights.MOBILENET_V2: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobilenetv2_gaze.onnx',
|
||||
GazeWeights.MOBILEONE_S0: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobileone_s0_gaze.onnx',
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
|
||||
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
MODEL_SHA256: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
@@ -150,6 +238,9 @@ MODEL_SHA256: Dict[Enum, str] = {
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# AdaFace
|
||||
AdaFaceWeights.IR_18: '6b6a35772fb636cdd4fa86520c1a259d0c41472a76f70f802b351837a00d9870',
|
||||
AdaFaceWeights.IR_101: 'f2eb07d03de0af560a82e1214df799fec5e09375d43521e2868f9dc387e5a43e',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
@@ -157,13 +248,30 @@ MODEL_SHA256: Dict[Enum, str] = {
|
||||
YOLOv5FaceWeights.YOLOV5N: 'eb244a06e36999db732b317c2b30fa113cd6cfc1a397eaf738f2d6f33c01f640',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'fc682801cd5880e1e296184a14aea0035486b5146ec1a1389d2e7149cb134bb2',
|
||||
YOLOv5FaceWeights.YOLOV5M: '04302ce27a15bde3e20945691b688e2dd018a10e92dd8932146bede6a49207b2',
|
||||
# YOLOv8-Face
|
||||
YOLOv8FaceWeights.YOLOV8_LITE_S: '11bc496be01356d2d960085bfd8abb8f103199900a034f239a8a1705a1b31dba',
|
||||
YOLOv8FaceWeights.YOLOV8N: '33f3951af7fc0c4d9b321b29cdcd8c9a59d0a29a8d4bdc01fcb5507d5c714809',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
|
||||
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -1,48 +1,55 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
from .yolov8 import YOLOv8Face
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
# Global cache for detector instances (keyed by method name + config hash)
|
||||
_detector_cache: dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
|
||||
"""High-level face detection function.
|
||||
|
||||
Detects faces in an image using the specified detection method.
|
||||
Results are cached for repeated calls with the same configuration.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
A list of Face objects, each containing:
|
||||
- bbox: [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence: The confidence score of the detection.
|
||||
- landmarks: 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> import cv2
|
||||
>>> image = cv2.imread('your_image.jpg')
|
||||
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
... print(f'Found face with confidence: {face.confidence}')
|
||||
... print(f'BBox: {face.bbox}')
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f'{method_name}_{str(sorted_kwargs)}'
|
||||
cache_key = f'{method_name}_{sorted_kwargs!s}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
@@ -52,50 +59,36 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
method: Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
|
||||
**kwargs: Detector-specific parameters
|
||||
- 'yolov8face': YOLOv8-Face detector (anchor-free, accurate)
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> from uniface.constants import SCRFDWeights
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... 'scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.8, input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... )
|
||||
|
||||
>>> # YOLOv5-Face detector
|
||||
>>> detector = create_detector(
|
||||
... 'yolov5face',
|
||||
... model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
... conf_thresh=0.25,
|
||||
... nms_thresh=0.45
|
||||
... )
|
||||
>>> # YOLOv8-Face detector
|
||||
>>> from uniface.constants import YOLOv8FaceWeights
|
||||
>>> detector = create_detector('yolov8face', model_name=YOLOv8FaceWeights.YOLOV8N, confidence_threshold=0.5)
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
@@ -108,17 +101,20 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
elif method == 'yolov5face':
|
||||
return YOLOv5Face(**kwargs)
|
||||
|
||||
elif method == 'yolov8face':
|
||||
return YOLOv8Face(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face']
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face', 'yolov8face']
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
"""List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
Dictionary mapping detector names to their information including
|
||||
description, landmark support, paper reference, and default parameters.
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
@@ -127,8 +123,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -138,8 +134,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -149,8 +145,19 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'conf_thresh': 0.25,
|
||||
'nms_thresh': 0.45,
|
||||
'confidence_threshold': 0.25,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
'yolov8face': {
|
||||
'description': 'YOLOv8-Face detector - anchor-free design with DFL for accurate detection',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://github.com/derronqi/yolov8-face',
|
||||
'default_params': {
|
||||
'model_name': 'yolov8n_face',
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
@@ -158,11 +165,12 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'BaseDetector',
|
||||
'RetinaFace',
|
||||
'YOLOv5Face',
|
||||
'BaseDetector',
|
||||
'YOLOv8Face',
|
||||
'create_detector',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
]
|
||||
|
||||
@@ -1,76 +1,83 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['BaseDetector']
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
Abstract base class for all face detectors.
|
||||
"""Abstract base class for all face detectors.
|
||||
|
||||
This class defines the interface that all face detectors must implement,
|
||||
ensuring consistency across different detection methods.
|
||||
|
||||
Attributes:
|
||||
config: Dictionary containing detector configuration parameters.
|
||||
_supports_landmarks: Flag indicating if detector supports landmark detection.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the detector with configuration parameters."""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the detector with configuration parameters.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array with shape (H, W, C)
|
||||
**kwargs: Additional detection parameters
|
||||
**kwargs: Detector-specific configuration parameters.
|
||||
"""
|
||||
self.config: dict[str, Any] = kwargs
|
||||
self._supports_landmarks: bool = False
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
List of detected Face objects, each containing:
|
||||
- bbox: Bounding box coordinates with shape (4,) as [x1, y1, x2, y2].
|
||||
- confidence: Detection confidence score (0.0 to 1.0).
|
||||
- landmarks: Facial landmarks with shape (5, 2) for 5-point landmarks.
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess input image for detection.
|
||||
"""Preprocess input image for detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor
|
||||
Preprocessed image tensor ready for inference.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Any:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
def postprocess(self, outputs: Any, **kwargs: Any) -> Any:
|
||||
"""Postprocess model outputs to get final detections.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
**kwargs: Additional postprocessing parameters
|
||||
outputs: Raw model outputs.
|
||||
**kwargs: Additional postprocessing parameters.
|
||||
|
||||
Returns:
|
||||
Any: Processed outputs (implementation-specific format, typically tuple of arrays)
|
||||
Processed outputs (implementation-specific format, typically tuple of arrays).
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
@@ -82,23 +89,33 @@ class BaseDetector(ABC):
|
||||
|
||||
@property
|
||||
def supports_landmarks(self) -> bool:
|
||||
"""
|
||||
Whether this detector supports landmark detection.
|
||||
"""Whether this detector supports landmark detection.
|
||||
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
True if landmarks are supported, False otherwise.
|
||||
"""
|
||||
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detector information and configuration.
|
||||
def get_info(self) -> dict[str, Any]:
|
||||
"""Get detector information and configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Detector information
|
||||
Dictionary containing detector name, landmark support, and config.
|
||||
"""
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config,
|
||||
}
|
||||
|
||||
def __call__(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Callable shortcut for the `detect` method.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
List of detected Face objects.
|
||||
"""
|
||||
return self.detect(image, **kwargs)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -17,6 +19,7 @@ from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
@@ -31,11 +34,13 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Args:
|
||||
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Fixed input size (width, height) if `dynamic_size=False`.
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
**kwargs: Advanced options:
|
||||
pre_nms_topk (int): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Max number of detections kept after NMS. Defaults to 750.
|
||||
@@ -43,8 +48,8 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold for confidence-based filtering.
|
||||
nms_thresh (float): IoU threshold used for NMS.
|
||||
confidence_threshold (float): Threshold for confidence-based filtering.
|
||||
nms_threshold (float): IoU threshold used for NMS.
|
||||
pre_nms_topk (int): Limit on proposals before applying NMS.
|
||||
post_nms_topk (int): Limit on retained detections after NMS.
|
||||
dynamic_size (bool): Flag indicating dynamic or static input sizing.
|
||||
@@ -62,24 +67,27 @@ class RetinaFace(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
providers: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
providers=providers,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
self.providers = providers
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
|
||||
@@ -87,8 +95,8 @@ class RetinaFace(BaseDetector):
|
||||
self.dynamic_size = kwargs.get('dynamic_size', False)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing RetinaFace with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -104,17 +112,16 @@ class RetinaFace(BaseDetector):
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.session = create_onnx_session(model_path, providers=self.providers)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
@@ -136,14 +143,14 @@ class RetinaFace(BaseDetector):
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs (location, confidence, landmarks).
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
@@ -154,7 +161,7 @@ class RetinaFace(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -168,19 +175,19 @@ class RetinaFace(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -229,51 +236,53 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
resize_factor: float,
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
outputs: Raw outputs from the detection model containing:
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
resize_factor: Factor used to resize the input image during preprocessing.
|
||||
shape: Original shape of the image as (width, height).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
A tuple containing:
|
||||
- detections: Array of detected bounding boxes with confidence scores,
|
||||
shape (num_detections, 5), each row is [x1, y1, x2, y2, score].
|
||||
- landmarks: Array of detected facial landmarks,
|
||||
shape (num_detections, 5, 2), each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = (
|
||||
location_predictions, confidence_scores, landmark_predictions = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
boxes = decode_boxes(location_predictions, self._priors)
|
||||
landmarks = decode_landmarks(landmark_predictions, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
scores = confidence_scores[:, 1]
|
||||
mask = scores > self.confidence_threshold
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
@@ -284,7 +293,7 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections, self.nms_thresh)
|
||||
keep = non_max_suppression(detections, self.nms_threshold)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
@@ -302,9 +311,9 @@ class RetinaFace(BaseDetector):
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Scale bounding boxes and landmarks to the original image size."""
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
@@ -312,72 +321,3 @@ class RetinaFace(BaseDetector):
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
@@ -12,6 +13,7 @@ from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
@@ -29,19 +31,21 @@ class SCRFD(BaseDetector):
|
||||
Args:
|
||||
model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Input image size (width, height).
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
**kwargs: Reserved for future advanced options.
|
||||
|
||||
Attributes:
|
||||
model_name (SCRFDWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
|
||||
_fmc (int): Number of feature map levels used in the model.
|
||||
_num_feature_maps (int): Number of feature map levels used in the model.
|
||||
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
|
||||
_num_anchors (int): Number of anchors per feature location.
|
||||
_center_cache (Dict): Cached anchor centers for efficient forward passes.
|
||||
@@ -56,35 +60,38 @@ class SCRFD(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
providers: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
providers=providers,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
self.providers = providers
|
||||
|
||||
# ------- SCRFD model params ------
|
||||
self._fmc = 3
|
||||
self._num_feature_maps = 3
|
||||
self._feat_stride_fpn = [8, 16, 32]
|
||||
self._num_anchors = 2
|
||||
self._center_cache = {}
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f'Initializing SCRFD with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing SCRFD with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -95,17 +102,16 @@ class SCRFD(BaseDetector):
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.session = create_onnx_session(model_path, providers=self.providers)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
@@ -113,14 +119,14 @@ class SCRFD(BaseDetector):
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
|
||||
Preprocessed image tensor with shape (1, C, H, W).
|
||||
"""
|
||||
image = image.astype(np.float32)
|
||||
image = (image - 127.5) / 127.5
|
||||
@@ -129,29 +135,42 @@ class SCRFD(BaseDetector):
|
||||
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
|
||||
scores_list = []
|
||||
def postprocess(
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
image_size: tuple[int, int],
|
||||
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
|
||||
"""Process model outputs into detection results.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the detection model.
|
||||
image_size: Size of the input image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple of (scores_list, bboxes_list, landmarks_list).
|
||||
"""
|
||||
scores_list: list[np.ndarray] = []
|
||||
bboxes_list = []
|
||||
kpss_list = []
|
||||
|
||||
image_size = image_size
|
||||
|
||||
fmc = self._fmc
|
||||
num_feature_maps = self._num_feature_maps
|
||||
for idx, stride in enumerate(self._feat_stride_fpn):
|
||||
scores = outputs[idx]
|
||||
bbox_preds = outputs[fmc + idx] * stride
|
||||
kps_preds = outputs[2 * fmc + idx] * stride
|
||||
bbox_preds = outputs[num_feature_maps + idx] * stride
|
||||
kps_preds = outputs[2 * num_feature_maps + idx] * stride
|
||||
|
||||
# Generate anchors
|
||||
fm_height = image_size[0] // stride
|
||||
@@ -171,7 +190,7 @@ class SCRFD(BaseDetector):
|
||||
if len(self._center_cache) < 100:
|
||||
self._center_cache[cache_key] = anchor_centers
|
||||
|
||||
pos_indices = np.where(scores >= self.conf_thresh)[0]
|
||||
pos_indices = np.where(scores >= self.confidence_threshold)[0]
|
||||
if len(pos_indices) == 0:
|
||||
continue
|
||||
|
||||
@@ -193,7 +212,7 @@ class SCRFD(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -207,19 +226,19 @@ class SCRFD(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -247,7 +266,7 @@ class SCRFD(BaseDetector):
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_thresh)
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_threshold)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
@@ -280,78 +299,11 @@ class SCRFD(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -12,9 +12,19 @@ from uniface.constants import YOLOv5FaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
# Optional torchvision import for faster NMS
|
||||
try:
|
||||
import torch
|
||||
import torchvision
|
||||
|
||||
TORCHVISION_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCHVISION_AVAILABLE = False
|
||||
|
||||
__all__ = ['YOLOv5Face']
|
||||
|
||||
|
||||
@@ -29,18 +39,23 @@ class YOLOv5Face(BaseDetector):
|
||||
Args:
|
||||
model_name (YOLOv5FaceWeights): Predefined model enum (e.g., `YOLOV5S`).
|
||||
Specifies the YOLOv5-Face variant to load. Defaults to YOLOV5S.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
input_size (int): Input image size. Defaults to 640.
|
||||
Note: ONNX model is fixed at 640. Changing this will cause inference errors.
|
||||
nms_mode (str): NMS calculation method. Options: 'torchvision' (faster, requires torch)
|
||||
or 'numpy' (no dependencies). Defaults to 'numpy'.
|
||||
providers (list[str] | None): ONNX Runtime execution providers. If None, auto-detects
|
||||
the best available provider. Example: ['CPUExecutionProvider'] to force CPU.
|
||||
**kwargs: Advanced options:
|
||||
max_det (int): Maximum number of detections to return. Defaults to 750.
|
||||
|
||||
Attributes:
|
||||
model_name (YOLOv5FaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (int): Image size to which inputs are resized before inference.
|
||||
nms_mode (str): NMS calculation method being used.
|
||||
max_det (int): Maximum number of detections to return.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
|
||||
@@ -53,16 +68,20 @@ class YOLOv5Face(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: YOLOv5FaceWeights = YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh: float = 0.6,
|
||||
nms_thresh: float = 0.5,
|
||||
confidence_threshold: float = 0.6,
|
||||
nms_threshold: float = 0.5,
|
||||
input_size: int = 640,
|
||||
nms_mode: Literal['torchvision', 'numpy'] = 'numpy',
|
||||
providers: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
nms_mode=nms_mode,
|
||||
providers=providers,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # YOLOv5-Face supports landmarks
|
||||
@@ -74,16 +93,24 @@ class YOLOv5Face(BaseDetector):
|
||||
)
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
self.providers = providers
|
||||
|
||||
# Set NMS mode with automatic fallback
|
||||
if nms_mode == 'torchvision' and not TORCHVISION_AVAILABLE:
|
||||
Logger.warning('torchvision not available, falling back to numpy NMS')
|
||||
self.nms_mode = 'numpy'
|
||||
else:
|
||||
self.nms_mode = nms_mode
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.max_det = kwargs.get('max_det', 750)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}, nms_mode={self.nms_mode}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -104,7 +131,7 @@ class YOLOv5Face(BaseDetector):
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.session = create_onnx_session(model_path, providers=self.providers)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
@@ -112,7 +139,7 @@ class YOLOv5Face(BaseDetector):
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
|
||||
def preprocess(self, image: np.ndarray) -> tuple[np.ndarray, float, tuple[int, int]]:
|
||||
"""
|
||||
Preprocess image for inference.
|
||||
|
||||
@@ -153,7 +180,7 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
return img_batch, scale, (pad_w, pad_h)
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
@@ -168,8 +195,8 @@ class YOLOv5Face(BaseDetector):
|
||||
self,
|
||||
predictions: np.ndarray,
|
||||
scale: float,
|
||||
padding: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
padding: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model predictions.
|
||||
|
||||
@@ -189,7 +216,7 @@ class YOLOv5Face(BaseDetector):
|
||||
predictions = predictions[0] # Remove batch dimension
|
||||
|
||||
# Filter by confidence
|
||||
mask = predictions[:, 4] >= self.conf_thresh
|
||||
mask = predictions[:, 4] >= self.confidence_threshold
|
||||
predictions = predictions[mask]
|
||||
|
||||
if len(predictions) == 0:
|
||||
@@ -204,9 +231,16 @@ class YOLOv5Face(BaseDetector):
|
||||
# Get landmarks (5 points, 10 coordinates)
|
||||
landmarks = predictions[:, 5:15].copy()
|
||||
|
||||
# Apply NMS
|
||||
detections_for_nms = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections_for_nms, self.nms_thresh)
|
||||
# Apply NMS based on selected mode
|
||||
if self.nms_mode == 'torchvision':
|
||||
keep = torchvision.ops.nms(
|
||||
torch.tensor(boxes, dtype=torch.float32),
|
||||
torch.tensor(scores, dtype=torch.float32),
|
||||
self.nms_threshold,
|
||||
).numpy()
|
||||
else:
|
||||
detections = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections, self.nms_threshold)
|
||||
|
||||
if len(keep) == 0:
|
||||
return np.array([]), np.array([])
|
||||
@@ -259,7 +293,7 @@ class YOLOv5Face(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -273,19 +307,19 @@ class YOLOv5Face(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -330,11 +364,11 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user