Compare commits
23 Commits
v2.1.0
...
feat/unifa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb81d2fcf8 | ||
|
|
f0bae6dd80 | ||
|
|
eec8f99850 | ||
|
|
3682a2124f | ||
|
|
2ef6a1ebe8 | ||
|
|
78a2dba7c7 | ||
|
|
87e496d1f5 | ||
|
|
5604ebf4f1 | ||
|
|
971775b2e8 | ||
|
|
c520ea2df2 | ||
|
|
2a8cb54d31 | ||
|
|
331f46be7c | ||
|
|
9991fae62a | ||
|
|
b74ab95d39 | ||
|
|
d2b0303bfe | ||
|
|
5f74487eb3 | ||
|
|
f897482d26 | ||
|
|
f3d81eb201 | ||
|
|
ea0b56f7e0 | ||
|
|
edbab5f7bf | ||
|
|
cd8077e460 | ||
|
|
452b3381a2 | ||
|
|
07c8bd7b24 |
BIN
.github/logos/gaze_crop.png
vendored
|
Before Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
|
Before Width: | Height: | Size: 673 KiB |
BIN
.github/logos/logo_preview.jpg
vendored
|
Before Width: | Height: | Size: 826 KiB |
BIN
.github/logos/logo_readme.png
vendored
|
Before Width: | Height: | Size: 563 KiB |
BIN
.github/logos/logo_web.webp
vendored
|
Before Width: | Height: | Size: 33 KiB |
BIN
.github/logos/uniface_enhanced.webp
vendored
Normal file
|
After Width: | Height: | Size: 427 KiB |
BIN
.github/logos/uniface_high_res_original.png
vendored
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
.github/logos/uniface_rounded.png
vendored
Normal file
|
After Width: | Height: | Size: 1.8 MiB |
BIN
.github/logos/uniface_rounded_150px.png
vendored
Normal file
|
After Width: | Height: | Size: 1.9 MiB |
BIN
.github/logos/uniface_rounded_q80.png
vendored
Normal file
|
After Width: | Height: | Size: 872 KiB |
BIN
.github/logos/uniface_rounded_q80.webp
vendored
Normal file
|
After Width: | Height: | Size: 62 KiB |
18
.github/workflows/ci.yml
vendored
@@ -1,14 +1,12 @@
|
||||
name: CI
|
||||
name: Build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -22,7 +20,7 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
python-version: "3.10"
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
@@ -33,8 +31,16 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.11", "3.13"]
|
||||
include:
|
||||
# Full Python range on Linux (fastest runner)
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.10"
|
||||
- os: ubuntu-latest
|
||||
python-version: "3.13"
|
||||
- os: macos-latest
|
||||
python-version: "3.13"
|
||||
- os: windows-latest
|
||||
python-version: "3.13"
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
4
.github/workflows/publish.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
python-version: "3.11" # Needs 3.11+ for tomllib
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.11", "3.13"]
|
||||
python-version: ["3.10", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
2
.gitignore
vendored
@@ -1,4 +1,6 @@
|
||||
tmp_*
|
||||
.vscode/
|
||||
*.onnx
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
|
||||
@@ -59,12 +59,12 @@ This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formattin
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.11+ (use modern syntax)
|
||||
- **Python version:** 3.10+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
|
||||
Use modern Python 3.10+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
@@ -82,23 +82,23 @@ def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
method: Detection method. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> from uniface import create_detector
|
||||
>>> detector = create_detector('retinaface', confidence_threshold=0.8)
|
||||
>>> faces = detector.detect(image)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
@@ -174,16 +174,16 @@ When adding a new model or feature:
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Example | Notebook |
|
||||
| ------------------ | ------------------------------------------------------------------- |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
|
||||
196
README.md
@@ -1,34 +1,38 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
<h1 align="center">UniFace: All-in-One Face Analysis Library</h1>
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=80%>
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" width="90%" alt="UniFace - All-in-One Open-Source Face Analysis Library">
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
> 💬 **Have questions?** [Chat with this codebase on DeepWiki](https://deepwiki.com/yakhyo/uniface) - AI-powered docs that let you ask anything about UniFace.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **Face Detection** — RetinaFace, SCRFD, and YOLOv5-Face with 5-point landmarks
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Facial Landmarks** — 106-point landmark localization
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes)
|
||||
- **Face Tracking** — Multi-object tracking with [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) for persistent IDs across video frames
|
||||
- **Facial Landmarks** — 106-point landmark localization module (separate from 5-point detector landmarks)
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes), XSeg face masking
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
- **Attribute Analysis** — Age, gender, race (FairFace), and emotion
|
||||
- **Vector Indexing** — FAISS-backed embedding store for fast multi-identity search
|
||||
- **Anti-Spoofing** — Face liveness detection with MiniFASNet
|
||||
- **Face Anonymization** — 5 blur methods for privacy protection
|
||||
- **Hardware Acceleration** — ARM64 (Apple Silicon), CUDA (NVIDIA), CPU
|
||||
@@ -37,31 +41,72 @@
|
||||
|
||||
## Installation
|
||||
|
||||
**Standard installation**
|
||||
|
||||
```bash
|
||||
# Standard installation
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
# GPU support (CUDA)
|
||||
**GPU support (CUDA)**
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
# From source
|
||||
**From source (latest version)**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface && pip install -e .
|
||||
```
|
||||
|
||||
**FAISS vector indexing**
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # or faiss-gpu for CUDA
|
||||
```
|
||||
|
||||
**Optional dependencies**
|
||||
- Emotion model uses TorchScript and requires `torch`:
|
||||
`pip install torch` (choose the correct build for your OS/CUDA)
|
||||
- YOLOv5-Face and YOLOv8-Face support faster NMS with `torchvision`:
|
||||
`pip install torch torchvision` then use `nms_mode='torchvision'`
|
||||
|
||||
---
|
||||
|
||||
## Quick Example
|
||||
## Model Downloads and Cache
|
||||
|
||||
Models are downloaded automatically on first use and verified via SHA-256.
|
||||
|
||||
Default cache location: `~/.uniface/models`
|
||||
|
||||
Override with the programmatic API or environment variable:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
```
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/data/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Example (Detection)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize detector (models auto-download on first use)
|
||||
detector = RetinaFace()
|
||||
|
||||
# Detect faces
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
@@ -71,14 +116,54 @@ for face in faces:
|
||||
```
|
||||
|
||||
<div align="center">
|
||||
<img src="assets/test_result.png">
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/assets/test_result.png" width="90%">
|
||||
<p>Face Detection Model Output</p>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Example (Face Analyzer)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
if image is None:
|
||||
raise ValueError("Failed to load image. Check the path to 'photo.jpg'.")
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
for face in faces:
|
||||
print(face.bbox, face.embedding.shape if face.embedding is not None else None)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution Providers (ONNX Runtime)
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Force CPU-only inference
|
||||
detector = RetinaFace(providers=["CPUExecutionProvider"])
|
||||
```
|
||||
|
||||
See more in the docs:
|
||||
https://yakhyo.github.io/uniface/concepts/execution-providers/
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
📚 **Full documentation**: [yakhyo.github.io/uniface](https://yakhyo.github.io/uniface/)
|
||||
Full documentation: https://yakhyo.github.io/uniface/
|
||||
|
||||
| Resource | Description |
|
||||
|----------|-------------|
|
||||
@@ -87,8 +172,27 @@ for face in faces:
|
||||
| [API Reference](https://yakhyo.github.io/uniface/modules/detection/) | Detailed module documentation |
|
||||
| [Tutorials](https://yakhyo.github.io/uniface/recipes/image-pipeline/) | Step-by-step workflow examples |
|
||||
| [Guides](https://yakhyo.github.io/uniface/concepts/overview/) | Architecture and design principles |
|
||||
| [Datasets](https://yakhyo.github.io/uniface/datasets/) | Training data and evaluation benchmarks |
|
||||
|
||||
### Jupyter Notebooks
|
||||
---
|
||||
|
||||
## Datasets
|
||||
|
||||
| Task | Training Dataset | Models |
|
||||
|------|-----------------|--------|
|
||||
| Detection | WIDER FACE | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | MS1MV2 | MobileFace, SphereFace |
|
||||
| Recognition | WebFace600K | ArcFace |
|
||||
| Recognition | WebFace4M / 12M | AdaFace |
|
||||
| Gaze | Gaze360 | MobileGaze |
|
||||
| Parsing | CelebAMask-HQ | BiSeNet |
|
||||
| Attributes | CelebA, FairFace, AffectNet | AgeGender, FairFace, Emotion |
|
||||
|
||||
> See [Datasets documentation](https://yakhyo.github.io/uniface/datasets/) for download links, benchmarks, and details.
|
||||
|
||||
---
|
||||
|
||||
## Jupyter Notebooks
|
||||
|
||||
| Example | Colab | Description |
|
||||
|---------|:-----:|-------------|
|
||||
@@ -100,25 +204,55 @@ for face in faces:
|
||||
| [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [09_face_segmentation.ipynb](examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [10_face_vector_store.ipynb](examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
|
||||
---
|
||||
|
||||
## Licensing and Model Usage
|
||||
|
||||
UniFace is MIT-licensed, but several pretrained models carry their own licenses.
|
||||
Review: https://yakhyo.github.io/uniface/license-attribution/
|
||||
|
||||
Notable examples:
|
||||
- YOLOv5-Face and YOLOv8-Face weights are GPL-3.0
|
||||
- FairFace weights are CC BY 4.0
|
||||
|
||||
If you plan commercial use, verify model license compatibility.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) — RetinaFace training
|
||||
- [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) — YOLOv5-Face ONNX
|
||||
- [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) — ArcFace, MobileFace, SphereFace
|
||||
- [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) — BiSeNet face parsing
|
||||
- [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) — MobileGaze training
|
||||
- [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) — MiniFASNet inference
|
||||
- [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) — FairFace attributes
|
||||
- [deepinsight/insightface](https://github.com/deepinsight/insightface) — Model architectures
|
||||
| Feature | Repository | Training | Description |
|
||||
|---------|------------|:--------:|-------------|
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | ✓ | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | - | YOLOv5-Face ONNX Inference |
|
||||
| Detection | [yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | - | YOLOv8-Face ONNX Inference |
|
||||
| Tracking | [bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) | - | BYTETracker Multi-Object Tracking |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | ✓ | MobileFace, SphereFace Training |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | ✓ | BiSeNet Face Parsing |
|
||||
| Parsing | [face-segmentation](https://github.com/yakhyo/face-segmentation) | - | XSeg Face Segmentation |
|
||||
| Gaze | [gaze-estimation](https://github.com/yakhyo/gaze-estimation) | ✓ | MobileGaze Training |
|
||||
| Anti-Spoofing | [face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) | - | MiniFASNet Inference |
|
||||
| Attributes | [fairface-onnx](https://github.com/yakhyo/fairface-onnx) | - | FairFace ONNX Inference |
|
||||
|
||||
*SCRFD and ArcFace models are from [InsightFace](https://github.com/deepinsight/insightface).
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
||||
Contributions are welcome. Please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Support
|
||||
|
||||
If you find this project useful, consider giving it a ⭐ on GitHub — it helps others discover it!
|
||||
|
||||
Questions or feedback:
|
||||
- Discord: https://discord.gg/wdzrjr7R5j
|
||||
- GitHub Issues: https://github.com/yakhyo/uniface/issues
|
||||
- DeepWiki Q&A: https://deepwiki.com/yakhyo/uniface
|
||||
|
||||
## License
|
||||
|
||||
|
||||
BIN
assets/einstein/img_0.png
Normal file
|
After Width: | Height: | Size: 99 KiB |
@@ -93,7 +93,7 @@ landmarks = face.landmarks # Shape: (5, 2)
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
@@ -174,7 +174,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace uses ONNX Runtime for model inference, which supports multiple hardware
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
@@ -17,12 +17,40 @@ detector = RetinaFace()
|
||||
|
||||
**Priority order:**
|
||||
|
||||
1. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
2. **CoreMLExecutionProvider** - Apple Silicon
|
||||
1. **CoreMLExecutionProvider** - Apple Silicon
|
||||
2. **CUDAExecutionProvider** - NVIDIA GPU
|
||||
3. **CPUExecutionProvider** - Fallback
|
||||
|
||||
---
|
||||
|
||||
## Explicit Provider Selection
|
||||
|
||||
You can specify which execution provider to use by passing the `providers` parameter:
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Force CPU execution (even if GPU is available)
|
||||
detector = RetinaFace(providers=['CPUExecutionProvider'])
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
|
||||
# Use CUDA with CPU fallback
|
||||
detector = RetinaFace(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
All model classes accept the `providers` parameter:
|
||||
|
||||
- Detection: `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face`
|
||||
- Recognition: `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace`
|
||||
- Landmarks: `Landmark106`
|
||||
- Gaze: `MobileGaze`
|
||||
- Parsing: `BiSeNet`
|
||||
- Attributes: `AgeGender`, `FairFace`
|
||||
- Anti-Spoofing: `MiniFASNet`
|
||||
|
||||
---
|
||||
|
||||
## Check Available Providers
|
||||
|
||||
```python
|
||||
@@ -147,7 +175,7 @@ pip install uniface[gpu]
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
@@ -53,6 +53,7 @@ class Face:
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
track_id: int | None = None # Persistent ID from tracker
|
||||
```
|
||||
|
||||
### Properties
|
||||
@@ -177,7 +178,7 @@ print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
@@ -199,16 +200,16 @@ print(f"Classes: {np.unique(mask)}") # [0, 1, 2, ...]
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace automatically downloads and caches models. This page explains how model
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
@@ -32,9 +32,9 @@ Default cache directory:
|
||||
|
||||
```
|
||||
~/.uniface/models/
|
||||
├── retinaface_mv2.onnx
|
||||
├── w600k_mbf.onnx
|
||||
├── 2d106det.onnx
|
||||
├── retinaface_mnet_v2.onnx
|
||||
├── arcface_mnet.onnx
|
||||
├── 2d_106.onnx
|
||||
├── gaze_resnet34.onnx
|
||||
├── parsing_resnet18.onnx
|
||||
└── ...
|
||||
@@ -44,44 +44,57 @@ Default cache directory:
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
Use the programmatic API to change the cache location at runtime:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
# Set a custom cache directory
|
||||
set_cache_dir('/data/models')
|
||||
|
||||
# Verify the current path
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# All subsequent model loads use the new directory
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Downloads to /data/models/
|
||||
```
|
||||
|
||||
Or set the `UNIFACE_CACHE_DIR` environment variable (see [Environment Variables](#environment-variables) below).
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
Download models before deployment using the concurrent downloader:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.model_store import download_models
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
# Download multiple models concurrently (up to 4 threads by default)
|
||||
paths = download_models([
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
])
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
for model, path in paths.items():
|
||||
print(f"{model.value} -> {path}")
|
||||
```
|
||||
|
||||
Or download one at a time:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
@@ -115,11 +128,20 @@ print(f"Copy from: {path}")
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
### 3. Point to the cache (if non-default location)
|
||||
|
||||
```python
|
||||
from uniface.model_store import set_cache_dir
|
||||
|
||||
# Only needed if the models are not at ~/.uniface/models/
|
||||
set_cache_dir('/path/to/copied/models')
|
||||
```
|
||||
|
||||
### 4. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
@@ -155,6 +177,8 @@ If a model fails verification, it's re-downloaded automatically.
|
||||
| YOLOv5n-Face | 11 MB | ✅ |
|
||||
| YOLOv5s-Face | 28 MB | ✅ |
|
||||
| YOLOv5m-Face | 82 MB | ✅ |
|
||||
| YOLOv8-Lite-S | 7.4 MB | ✅ |
|
||||
| YOLOv8n-Face | 12 MB | ✅ |
|
||||
|
||||
### Recognition Models
|
||||
|
||||
@@ -180,7 +204,12 @@ If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
Find and remove cached models:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir
|
||||
print(get_cache_dir()) # shows the active cache path
|
||||
```
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
@@ -196,20 +225,35 @@ Models will be re-downloaded on next use.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
There are three equivalent ways to configure the cache directory:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
**1. Programmatic API (recommended)**
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/path/to/custom/cache')
|
||||
print(get_cache_dir()) # /path/to/custom/cache
|
||||
```
|
||||
|
||||
**2. Direct environment variable (Python)**
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
**3. Shell environment variable**
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
All three methods set the same `UNIFACE_CACHE_DIR` environment variable under the hood. `get_cache_dir()` always returns the resolved path.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -15,7 +15,7 @@ graph TB
|
||||
end
|
||||
|
||||
subgraph Detection
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face]
|
||||
DET[RetinaFace / SCRFD / YOLOv5Face / YOLOv8Face]
|
||||
end
|
||||
|
||||
subgraph Analysis
|
||||
@@ -28,6 +28,14 @@ graph TB
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Tracking
|
||||
TRK[BYTETracker]
|
||||
end
|
||||
|
||||
subgraph Indexing
|
||||
IDX[FAISS Vector Store]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
@@ -40,9 +48,12 @@ graph TB
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
DET --> TRK
|
||||
REC --> IDX
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
TRK --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
@@ -51,12 +62,14 @@ graph TB
|
||||
|
||||
### 1. ONNX-First
|
||||
|
||||
All models use ONNX Runtime for inference:
|
||||
UniFace runs inference primarily via ONNX Runtime for core components:
|
||||
|
||||
- **Cross-platform**: Same models work on macOS, Linux, Windows
|
||||
- **Hardware acceleration**: Automatic selection of optimal provider
|
||||
- **Production-ready**: No Python-only dependencies for inference
|
||||
|
||||
Some optional components (e.g., emotion TorchScript, torchvision NMS) require PyTorch.
|
||||
|
||||
### 2. Minimal Dependencies
|
||||
|
||||
Core dependencies are kept minimal:
|
||||
@@ -74,12 +87,14 @@ tqdm # Progress bars
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
|
||||
# Or via factory function
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
@@ -97,19 +112,21 @@ def detect(self, image: np.ndarray) -> list[Face]:
|
||||
|
||||
```
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face)
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── tracking/ # Multi-object tracking (BYTETracker)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
├── gaze/ # Gaze estimation
|
||||
├── spoofing/ # Anti-spoofing
|
||||
├── privacy/ # Face anonymization
|
||||
├── indexing/ # Vector indexing (FAISS)
|
||||
├── types.py # Dataclasses (Face, GazeResult, etc.)
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
└── draw.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
@@ -120,7 +137,9 @@ A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -151,12 +170,21 @@ for face in faces:
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
fairface = FairFace()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
age_gender=age_gender,
|
||||
fairface=fairface,
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
@@ -170,7 +198,7 @@ for face in faces:
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
2. **Cached**: Stored in `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR`)
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
@@ -179,6 +207,11 @@ for face in faces:
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Optionally configure cache location
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
@@ -11,7 +11,7 @@ This page explains how to tune detection and recognition thresholds for your use
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
@@ -81,7 +81,7 @@ For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
@@ -199,7 +199,7 @@ else:
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
|
||||
@@ -32,7 +32,7 @@ ruff check . --fix
|
||||
**Guidelines:**
|
||||
|
||||
- Line length: 120
|
||||
- Python 3.11+ type hints
|
||||
- Python 3.10+ type hints
|
||||
- Google-style docstrings
|
||||
|
||||
---
|
||||
|
||||
324
docs/datasets.md
Normal file
@@ -0,0 +1,324 @@
|
||||
# Datasets
|
||||
|
||||
Overview of all training datasets and evaluation benchmarks used by UniFace models.
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| Task | Dataset | Scale | Models |
|
||||
| ----------- | ------------------------------------------------ | ---------------------- | ------------------------------------------- |
|
||||
| Detection | [WIDER FACE](#wider-face) | 32K images | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
| Recognition | [MS1MV2](#ms1mv2) | 5.8M images, 85.7K IDs | MobileFace, SphereFace |
|
||||
| Recognition | [WebFace600K](#webface600k) | 600K images | ArcFace |
|
||||
| Recognition | [WebFace4M / WebFace12M](#webface4m--webface12m) | 4M / 12M images | AdaFace |
|
||||
| Gaze | [Gaze360](#gaze360) | 238 subjects | MobileGaze |
|
||||
| Parsing | [CelebAMask-HQ](#celebamask-hq) | 30K images | BiSeNet |
|
||||
| Attributes | [CelebA](#celeba) | 200K images | AgeGender |
|
||||
| Attributes | [FairFace](#fairface) | Balanced demographics | FairFace |
|
||||
| Attributes | [AffectNet](#affectnet) | Emotion labels | Emotion |
|
||||
|
||||
---
|
||||
|
||||
## Training Datasets
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE
|
||||
|
||||
Large-scale face detection benchmark with images across 61 event categories. Contains faces with a high degree of variability in scale, pose, occlusion, expression, and illumination.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------------------------------------------- |
|
||||
| Images | ~32,000 (train/val/test split) |
|
||||
| Faces | ~394,000 annotated |
|
||||
| Subsets | Easy, Medium, Hard |
|
||||
| Used by | RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face |
|
||||
|
||||
!!! info "Download & References"
|
||||
**Paper**: [WIDER FACE: A Face Detection Benchmark](https://arxiv.org/abs/1511.06523)
|
||||
|
||||
**Download**: [http://shuoyang1213.me/WIDERFACE/](http://shuoyang1213.me/WIDERFACE/)
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
#### MS1MV2
|
||||
|
||||
Refined version of the MS-Celeb-1M dataset, cleaned by InsightFace. Widely used for training face recognition models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 85.7K |
|
||||
| Images | 5.8M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | MobileFace, SphereFace |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [ms1m-arcface-dataset](https://www.kaggle.com/datasets/yakhyokhuja/ms1m-arcface-dataset) (from InsightFace)
|
||||
|
||||
**Training code**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace600K
|
||||
|
||||
Medium-scale face recognition dataset from the WebFace series.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ------- |
|
||||
| Images | ~600K |
|
||||
| Used by | ArcFace |
|
||||
|
||||
!!! info "Source"
|
||||
**Origin**: [InsightFace](https://github.com/deepinsight/insightface)
|
||||
|
||||
**Paper**: [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
#### WebFace4M / WebFace12M
|
||||
|
||||
Large-scale face recognition datasets from the WebFace260M collection. Used for training AdaFace models with adaptive quality-aware margin.
|
||||
|
||||
| Property | WebFace4M | WebFace12M |
|
||||
| -------- | ------------- | -------------- |
|
||||
| Images | ~4M | ~12M |
|
||||
| Used by | AdaFace IR_18 | AdaFace IR_101 |
|
||||
|
||||
!!! info "Source"
|
||||
**Paper**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://arxiv.org/abs/2204.00964)
|
||||
|
||||
**Original code**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace)
|
||||
|
||||
---
|
||||
|
||||
#### CASIA-WebFace
|
||||
|
||||
Smaller-scale face recognition dataset suitable for academic research and lighter training runs.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 10.6K |
|
||||
| Images | 491K |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [webface-112x112](https://www.kaggle.com/datasets/yakhyokhuja/webface-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
#### VGGFace2
|
||||
|
||||
Large-scale dataset with wide variations in pose, age, illumination, ethnicity, and profession.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ------------------------------ |
|
||||
| Identities | 8.6K |
|
||||
| Images | 3.1M |
|
||||
| Format | Aligned and cropped to 112x112 |
|
||||
| Used by | Alternative training set |
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle (aligned 112x112)**: [vggface2-112x112](https://www.kaggle.com/datasets/yakhyokhuja/vggface2-112x112) (from OpenSphere)
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
#### Gaze360
|
||||
|
||||
Large-scale gaze estimation dataset collected in indoor and outdoor environments with diverse head poses and wide gaze ranges (up to 360 degrees).
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | --------------------- |
|
||||
| Subjects | 238 |
|
||||
| Environment | Indoor and outdoor |
|
||||
| Used by | All MobileGaze models |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [gaze360.csail.mit.edu/download.php](https://gaze360.csail.mit.edu/download.php)
|
||||
|
||||
**Preprocessing**: [GazeHub - Gaze360](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#gaze360)
|
||||
|
||||
!!! note "UniFace Models"
|
||||
All MobileGaze models shipped with UniFace are trained exclusively on Gaze360 for 200 epochs.
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── Gaze360/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### MPIIFaceGaze
|
||||
|
||||
Dataset for appearance-based gaze estimation from laptop webcam images of participants during everyday laptop usage. Supported by the gaze estimation training code but not used for the UniFace pretrained weights.
|
||||
|
||||
| Property | Value |
|
||||
| ----------- | ---------------------------------------- |
|
||||
| Subjects | 15 |
|
||||
| Environment | Everyday laptop usage |
|
||||
| Used by | Supported (not used for UniFace weights) |
|
||||
|
||||
!!! info "Download & Preprocessing"
|
||||
**Download**: [MPIIFaceGaze download page](https://www.mpi-inf.mpg.de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/its-written-all-over-your-face-full-face-appearance-based-gaze-estimation)
|
||||
|
||||
**Preprocessing**: [GazeHub - MPIIFaceGaze](https://phi-ai.buaa.edu.cn/Gazehub/3D-dataset/#mpiifacegaze)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
data/
|
||||
└── MPIIFaceGaze/
|
||||
├── Image/
|
||||
└── Label/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Face Parsing
|
||||
|
||||
#### CelebAMask-HQ
|
||||
|
||||
High-quality face parsing dataset with pixel-level annotations for 19 facial component classes.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ---------------------------- |
|
||||
| Images | 30,000 |
|
||||
| Classes | 19 facial components |
|
||||
| Resolution | High quality |
|
||||
| Used by | BiSeNet (ResNet18, ResNet34) |
|
||||
|
||||
!!! info "Source"
|
||||
**GitHub**: [switchablenorms/CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ)
|
||||
|
||||
**Training code**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
|
||||
**Dataset structure:**
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── images/ # Input face images
|
||||
│ ├── image1.jpg
|
||||
│ └── ...
|
||||
└── labels/ # Segmentation masks
|
||||
├── image1.png
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Attribute Analysis
|
||||
|
||||
#### CelebA
|
||||
|
||||
Large-scale face attributes dataset widely used for training age and gender prediction models.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | -------------------- |
|
||||
| Images | ~200K |
|
||||
| Attributes | 40 binary attributes |
|
||||
| Used by | AgeGender |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [Deep Learning Face Attributes in the Wild](https://arxiv.org/abs/1411.7766)
|
||||
|
||||
---
|
||||
|
||||
#### FairFace
|
||||
|
||||
Face attribute dataset designed for balanced representation across race, gender, and age groups. Provides more equitable predictions compared to imbalanced datasets.
|
||||
|
||||
| Property | Value |
|
||||
| ---------- | ----------------------------------- |
|
||||
| Attributes | Race (7), Gender (2), Age Group (9) |
|
||||
| Used by | FairFace |
|
||||
| License | CC BY 4.0 |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [FairFace: Face Attribute Dataset for Balanced Race, Gender, and Age](https://arxiv.org/abs/1908.04913)
|
||||
|
||||
**ONNX inference**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx)
|
||||
|
||||
---
|
||||
|
||||
#### AffectNet
|
||||
|
||||
Large-scale facial expression dataset for emotion recognition training.
|
||||
|
||||
| Property | Value |
|
||||
| -------- | ----------------------------------------------------------------------- |
|
||||
| Classes | 7 or 8 (Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry + Contempt) |
|
||||
| Used by | Emotion (AFFECNET7, AFFECNET8) |
|
||||
|
||||
!!! info "Reference"
|
||||
**Paper**: [AffectNet: A Database for Facial Expression, Valence, and Arousal Computing in the Wild](https://ieeexplore.ieee.org/document/8013713)
|
||||
|
||||
---
|
||||
|
||||
## Evaluation Benchmarks
|
||||
|
||||
### Face Detection
|
||||
|
||||
#### WIDER FACE Validation Set
|
||||
|
||||
The standard benchmark for face detection models. Results are reported across three difficulty subsets.
|
||||
|
||||
| Subset | Criteria |
|
||||
| ------ | --------------------------------------------- |
|
||||
| Easy | Large, clear, unoccluded faces |
|
||||
| Medium | Moderate scale and occlusion |
|
||||
| Hard | Small, heavily occluded, or challenging faces |
|
||||
|
||||
See [Model Zoo - Detection](models.md#face-detection-models) for per-model accuracy on each subset.
|
||||
|
||||
---
|
||||
|
||||
### Face Recognition
|
||||
|
||||
Recognition models are evaluated across multiple benchmarks. Aligned 112x112 validation datasets are available as a single download.
|
||||
|
||||
!!! info "Download"
|
||||
**Kaggle**: [agedb-30-calfw-cplfw-lfw-aligned-112x112](https://www.kaggle.com/datasets/yakhyokhuja/agedb-30-calfw-cplfw-lfw-aligned-112x112)
|
||||
|
||||
| Benchmark | Description | Used by |
|
||||
| ------------ | ----------------------------------------------------------------- | ------------------------------- |
|
||||
| **LFW** | Labeled Faces in the Wild - standard face verification benchmark | ArcFace, MobileFace, SphereFace |
|
||||
| **CALFW** | Cross-Age LFW - face verification across age gaps | MobileFace, SphereFace |
|
||||
| **CPLFW** | Cross-Pose LFW - face verification across pose variations | MobileFace, SphereFace |
|
||||
| **AgeDB-30** | Age database with 30-year age gaps | ArcFace, MobileFace, SphereFace |
|
||||
| **CFP-FP** | Celebrities in Frontal-Profile - frontal vs. profile verification | ArcFace |
|
||||
| **IJB-B** | IARPA Janus Benchmark B - TAR@FAR=0.01% | AdaFace |
|
||||
| **IJB-C** | IARPA Janus Benchmark C - TAR@FAR=1e-4 | AdaFace, ArcFace |
|
||||
|
||||
See [Model Zoo - Recognition](models.md#face-recognition-models) for per-model accuracy on each benchmark.
|
||||
|
||||
---
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
| Benchmark | Metric | Description |
|
||||
| -------------------- | ------------- | -------------------------------------------- |
|
||||
| **Gaze360 test set** | MAE (degrees) | Mean Absolute Error in gaze angle prediction |
|
||||
|
||||
See [Model Zoo - Gaze](models.md#gaze-estimation-models) for per-model MAE scores.
|
||||
|
||||
---
|
||||
|
||||
## Training Repositories
|
||||
|
||||
For training your own models or reproducing results, see the following repositories:
|
||||
|
||||
| Task | Repository | Datasets Supported |
|
||||
| ----------- | ------------------------------------------------------------------------- | ------------------------------- |
|
||||
| Detection | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | WIDER FACE |
|
||||
| Recognition | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MS1MV2, CASIA-WebFace, VGGFace2 |
|
||||
| Gaze | [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) | Gaze360, MPIIFaceGaze |
|
||||
| Parsing | [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) | CelebAMask-HQ |
|
||||
@@ -10,12 +10,17 @@ template: home.html
|
||||
|
||||
# UniFace { .hero-title }
|
||||
|
||||
<p class="hero-subtitle">A lightweight, production-ready face analysis library built on ONNX Runtime</p>
|
||||
<p class="hero-subtitle">All-in-One Open-Source Face Analysis Library</p>
|
||||
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
<!-- <img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/uniface_rounded_q80.webp" alt="UniFace - All-in-One Open-Source Face Analysis Library" style="max-width: 70%; margin: 1rem 0;"> -->
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
@@ -26,7 +31,7 @@ template: home.html
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-face-recognition: Face Detection
|
||||
ONNX-optimized RetinaFace, SCRFD, and YOLOv5-Face models with 5-point landmarks.
|
||||
ONNX-optimized detectors (RetinaFace, SCRFD, YOLO) with 5-point landmarks.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
@@ -54,6 +59,11 @@ BiSeNet semantic segmentation with 19 facial component classes.
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-motion-play: Tracking
|
||||
Multi-object tracking with BYTETracker for persistent face IDs across video frames.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
@@ -64,31 +74,35 @@ Face liveness detection with MiniFASNet to prevent fraud.
|
||||
Face anonymization with 5 blur methods for privacy protection.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-database-search: Vector Indexing
|
||||
FAISS-backed embedding store for fast multi-identity face search.
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
=== "Standard"
|
||||
UniFace runs inference primarily via **ONNX Runtime**; some optional components (e.g., emotion TorchScript, torchvision NMS) require **PyTorch**.
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
**Standard**
|
||||
```bash
|
||||
pip install uniface
|
||||
```
|
||||
|
||||
=== "GPU (CUDA)"
|
||||
**GPU (CUDA)**
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
**From Source**
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ This guide covers all installation options for UniFace.
|
||||
|
||||
## Requirements
|
||||
|
||||
- **Python**: 3.11 or higher
|
||||
- **Python**: 3.10 or higher
|
||||
- **Operating Systems**: macOS, Linux, Windows
|
||||
|
||||
---
|
||||
@@ -55,11 +55,10 @@ pip install uniface[gpu]
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- `uniface[gpu]` automatically installs `onnxruntime-gpu`. Requirements depend on the ORT version and execution provider.
|
||||
|
||||
!!! info "CUDA Compatibility"
|
||||
See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for detailed compatibility matrix.
|
||||
See the [ONNX Runtime GPU compatibility matrix](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html) for matching CUDA and cuDNN versions.
|
||||
|
||||
Verify GPU installation:
|
||||
|
||||
@@ -71,6 +70,19 @@ print("Available providers:", ort.get_available_providers())
|
||||
|
||||
---
|
||||
|
||||
### FAISS Vector Indexing
|
||||
|
||||
For fast multi-identity face search using a FAISS index:
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # CPU
|
||||
pip install faiss-gpu # NVIDIA GPU (CUDA)
|
||||
```
|
||||
|
||||
See the [Indexing module](modules/indexing.md) for usage.
|
||||
|
||||
---
|
||||
|
||||
### CPU-Only (All Platforms)
|
||||
|
||||
```bash
|
||||
@@ -108,9 +120,19 @@ UniFace has minimal dependencies:
|
||||
| `numpy` | Array operations |
|
||||
| `opencv-python` | Image processing |
|
||||
| `onnxruntime` | Model inference |
|
||||
| `scikit-image` | Geometric transforms |
|
||||
| `requests` | Model download |
|
||||
| `tqdm` | Progress bars |
|
||||
|
||||
**Optional:**
|
||||
|
||||
| Package | Install extra | Purpose |
|
||||
|---------|---------------|---------|
|
||||
| `faiss-cpu` / `faiss-gpu` | `pip install faiss-cpu` | FAISS vector indexing |
|
||||
| `onnxruntime-gpu` | `uniface[gpu]` | CUDA acceleration |
|
||||
| `torch` | `pip install torch` | Emotion model uses TorchScript |
|
||||
| `torchvision` | `pip install torchvision` | Faster NMS for YOLO detectors |
|
||||
|
||||
---
|
||||
|
||||
## Verify Installation
|
||||
@@ -126,7 +148,7 @@ import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
@@ -137,11 +159,11 @@ print("Installation successful!")
|
||||
|
||||
### Import Errors
|
||||
|
||||
If you encounter import errors, ensure you're using Python 3.11+:
|
||||
If you encounter import errors, ensure you're using Python 3.10+:
|
||||
|
||||
```bash
|
||||
python --version
|
||||
# Should show: Python 3.11.x or higher
|
||||
# Should show: Python 3.10.x or higher
|
||||
```
|
||||
|
||||
### Model Download Issues
|
||||
|
||||
@@ -13,6 +13,8 @@ UniFace is released under the [MIT License](https://opensource.org/licenses/MIT)
|
||||
| RetinaFace | [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | MIT |
|
||||
| SCRFD | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| YOLOv5-Face | [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | GPL-3.0 |
|
||||
| YOLOv8-Face | [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | GPL-3.0 |
|
||||
| AdaFace | [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) | MIT |
|
||||
| ArcFace | [InsightFace](https://github.com/deepinsight/insightface) | MIT |
|
||||
| MobileFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
| SphereFace | [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) | MIT |
|
||||
|
||||
250
docs/models.md
@@ -1,6 +1,6 @@
|
||||
# Model Zoo
|
||||
|
||||
Complete guide to all available models, their performance characteristics, and selection criteria.
|
||||
Complete guide to all available models and their performance characteristics.
|
||||
|
||||
---
|
||||
|
||||
@@ -8,57 +8,76 @@ Complete guide to all available models, their performance characteristics, and s
|
||||
|
||||
### RetinaFace Family
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
RetinaFace models are trained on the [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% |
|
||||
| `MNET_V2` :material-check-circle: | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### SCRFD Family
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
| Model Name | Params | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ |
|
||||
| `SCRFD_500M_KPS` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% |
|
||||
| `SCRFD_10G_KPS` :material-check-circle: | 4.2M | 17MB | 95.16% | 93.87% | 83.05% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
YOLOv5-Face models provide detection with 5-point facial landmarks, trained on [WIDER FACE](datasets.md#wider-face) dataset.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ---- | ------ | ------ | ------ | ------------------------------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| -------------- | ---- | ------ | ------ | ------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% |
|
||||
| `YOLOV5S` :material-check-circle: | 28MB | 94.33% | 92.61% | 83.15% |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face).
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
### YOLOv8-Face Family
|
||||
|
||||
YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for bbox regression. Provides detection with 5-point facial landmarks.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard |
|
||||
| ---------------- | ------ | ------ | ------ | ------ |
|
||||
| `YOLOV8_LITE_S`| 7.4MB | 93.4% | 91.2% | 78.6% |
|
||||
| `YOLOV8N` :material-check-circle: | 12MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
|
||||
---
|
||||
|
||||
@@ -66,73 +85,69 @@ YOLOv5-Face models provide excellent detection accuracy with 5-point facial land
|
||||
|
||||
### AdaFace
|
||||
|
||||
High-quality face recognition using adaptive margin based on image quality. Achieves state-of-the-art results on challenging benchmarks.
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
| Model Name | Backbone | Dataset | Size | IJB-B TAR | IJB-C TAR | Use Case |
|
||||
| ----------- | -------- | ----------- | ------ | --------- | --------- | --------------------- |
|
||||
| `IR_18` :material-check-circle: | IR-18 | WebFace4M | 92 MB | 93.03% | 94.99% | **Balanced (default)** |
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% | Maximum accuracy |
|
||||
| Model Name | Backbone | Dataset | Size | IJB-B TAR | IJB-C TAR |
|
||||
| ----------- | -------- | ----------- | ------ | --------- | --------- |
|
||||
| `IR_18` :material-check-circle: | IR-18 | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| `IR_101` | IR-101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Training Data & Accuracy"
|
||||
**Dataset**: WebFace4M (4M images) / WebFace12M (12M images)
|
||||
**Dataset**: [WebFace4M / WebFace12M](datasets.md#webface4m--webface12m) (4M / 12M images)
|
||||
|
||||
**Accuracy**: IJB-B and IJB-C benchmarks, TAR@FAR=0.01%
|
||||
|
||||
!!! tip "Key Innovation"
|
||||
AdaFace introduces adaptive margin that adjusts based on image quality, providing better performance on low-quality images compared to fixed-margin approaches.
|
||||
|
||||
**Reference**: [AdaFace: Quality Adaptive Margin for Face Recognition](https://github.com/mk-minchul/AdaFace) | [ONNX Export](https://github.com/yakhyo/adaface-onnx)
|
||||
|
||||
---
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
| Model Name | Backbone | Params | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
| ----------- | --------- | ------ | ----- | ------ | ------ | -------- | ----- |
|
||||
| `MNET` :material-check-circle: | MobileNet | 2.0M | 8MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Dataset**: Trained on [WebFace600K](datasets.md#webface600k) (600K images)
|
||||
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
**Accuracy**: IJB-C accuracy reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
### MobileFace
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| `MNET_V2` :material-check-circle: | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
!!! tip "Use Case"
|
||||
These models are lightweight alternatives to ArcFace for resource-constrained environments.
|
||||
|
||||
---
|
||||
|
||||
### SphereFace
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Dataset**: Trained on [MS1MV2](datasets.md#ms1mv2) (5.8M images, 85K identities)
|
||||
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
|
||||
@@ -145,11 +160,11 @@ Face recognition using angular softmax loss.
|
||||
|
||||
### 106-Point Landmark Detection
|
||||
|
||||
High-precision facial landmark localization.
|
||||
Facial landmark localization model.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
| Model Name | Points | Params | Size |
|
||||
| ---------- | ------ | ------ | ---- |
|
||||
| `2D106` | 106 | 3.7M | 14MB |
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
@@ -167,12 +182,12 @@ High-precision facial landmark localization.
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | ----------- | ------ | ---- |
|
||||
| `AgeGender` | Age, Gender | 2.1M | 8MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebA
|
||||
**Dataset**: Trained on [CelebA](datasets.md#celeba)
|
||||
|
||||
!!! warning "Accuracy Note"
|
||||
Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
@@ -181,12 +196,12 @@ High-precision facial landmark localization.
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | --------------------- | ------ | ----- | --------------------------- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB | Balanced demographic prediction |
|
||||
| Model Name | Attributes | Params | Size |
|
||||
| ----------- | --------------------- | ------ | ----- |
|
||||
| `FairFace` | Race, Gender, Age Group | - | 44MB |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
**Dataset**: Trained on [FairFace](datasets.md#fairface) dataset with balanced demographics
|
||||
|
||||
!!! tip "Equitable Predictions"
|
||||
FairFace provides more equitable predictions across different racial and gender groups.
|
||||
@@ -199,17 +214,17 @@ High-precision facial landmark localization.
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
| Model Name | Classes | Params | Size |
|
||||
| ------------- | ------- | ------ | ---- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on AffectNet
|
||||
**Dataset**: Trained on [AffectNet](datasets.md#affectnet)
|
||||
|
||||
!!! note "Accuracy Note"
|
||||
Emotion detection accuracy depends heavily on facial expression clarity and cultural context.
|
||||
@@ -220,20 +235,20 @@ High-precision facial landmark localization.
|
||||
|
||||
### MobileGaze Family
|
||||
|
||||
Real-time gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
Gaze direction prediction models trained on [Gaze360](datasets.md#gaze360) dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
| Model Name | Params | Size | MAE* | Use Case |
|
||||
| -------------- | ------ | ------- | ----- | ----------------------------- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 | Balanced accuracy/speed |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 | **Default** |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 | High accuracy |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 | Mobile/Edge devices |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 | Lightweight/Real-time |
|
||||
| Model Name | Params | Size | MAE* |
|
||||
| -------------- | ------ | ------- | ----- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 |
|
||||
| `RESNET34` :material-check-circle: | 24.8M | 81.6 MB | 11.33 |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 |
|
||||
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
**Dataset**: Trained on [Gaze360](datasets.md#gaze360) (indoor/outdoor scenes with diverse head poses)
|
||||
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
@@ -248,13 +263,13 @@ Real-time gaze direction prediction models trained on Gaze360 dataset. Returns p
|
||||
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes | Use Case |
|
||||
| -------------- | ------ | ------- | ------- | ----------------------------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 | **Default** |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 | Higher accuracy |
|
||||
| Model Name | Params | Size | Classes |
|
||||
| -------------- | ------ | ------- | ------- |
|
||||
| `RESNET18` :material-check-circle: | 13.3M | 50.7 MB | 19 |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 |
|
||||
|
||||
!!! info "Training Data"
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
**Dataset**: Trained on [CelebAMask-HQ](datasets.md#celebamask-hq)
|
||||
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
|
||||
@@ -264,13 +279,13 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
| 0 | Background | 7 | Left Ear | 14 | Neck |
|
||||
| 1 | Skin | 8 | Right Ear | 15 | Neck Lace |
|
||||
| 2 | Left Eyebrow | 9 | Ear Ring | 16 | Cloth |
|
||||
| 3 | Right Eyebrow | 10 | Nose | 17 | Hair |
|
||||
| 4 | Left Eye | 11 | Mouth | 18 | Hat |
|
||||
| 5 | Right Eye | 12 | Upper Lip | | |
|
||||
| 6 | Eye Glasses | 13 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
@@ -285,22 +300,48 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
|
||||
---
|
||||
|
||||
### XSeg
|
||||
|
||||
XSeg from DeepFaceLab outputs masks for face regions. Requires 5-point landmarks for face alignment.
|
||||
|
||||
| Model Name | Size | Output |
|
||||
|------------|--------|--------|
|
||||
| `DEFAULT` | 67 MB | Mask [0, 1] |
|
||||
|
||||
!!! info "Model Details"
|
||||
**Origin**: DeepFaceLab
|
||||
|
||||
**Input**: NHWC format, normalized to [0, 1]
|
||||
|
||||
**Alignment**: Requires 5-point landmarks (not bbox crops)
|
||||
|
||||
**Applications:**
|
||||
|
||||
- Face region extraction
|
||||
- Face swapping pipelines
|
||||
- Occlusion handling
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires 5-point facial landmarks. Use a face detector like RetinaFace to obtain landmarks first.
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Lightweight face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
Face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale | Use Case |
|
||||
| ---------- | ------ | ----- | ----------------------------- |
|
||||
| `V1SE` | 1.2 MB | 4.0 | Squeeze-and-excitation variant |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 | **Default** |
|
||||
| Model Name | Size | Scale |
|
||||
| ---------- | ------ | ----- |
|
||||
| `V1SE` | 1.2 MB | 4.0 |
|
||||
| `V2` :material-check-circle: | 1.2 MB | 2.7 |
|
||||
|
||||
!!! info "Output Format"
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
!!! note "Input Requirements"
|
||||
Requires face bounding box from a detector. Use with RetinaFace, SCRFD, or YOLOv5Face.
|
||||
Requires face bounding box from a detector.
|
||||
|
||||
---
|
||||
|
||||
@@ -308,10 +349,14 @@ Lightweight face anti-spoofing models for liveness detection. Detect if a face i
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Cache location**: `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR` env var)
|
||||
- **Inspect cache path**: `get_cache_dir()` returns the resolved active path
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Concurrent download**: `download_models([...])` fetches multiple models in parallel
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full details.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
@@ -321,11 +366,14 @@ Models are automatically downloaded and cached on first use.
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **YOLOv8-Face Original**: [derronqi/yolov8-face](https://github.com/derronqi/yolov8-face) - Original PyTorch implementation
|
||||
- **YOLOv8-Face ONNX**: [yakhyo/yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) - ONNX inference implementation
|
||||
- **AdaFace Original**: [mk-minchul/AdaFace](https://github.com/mk-minchul/AdaFace) - Original PyTorch implementation
|
||||
- **AdaFace ONNX**: [yakhyo/adaface-onnx](https://github.com/yakhyo/adaface-onnx) - ONNX export and inference
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Segmentation**: [yakhyo/face-segmentation](https://github.com/yakhyo/face-segmentation) - XSeg ONNX Inference
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
@@ -21,7 +21,8 @@ Predicts exact age and binary gender.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -54,7 +55,8 @@ Predicts gender, age group, and race with balanced demographics.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -120,7 +122,7 @@ Predicts facial emotions. Requires PyTorch.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
@@ -147,7 +149,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
@@ -159,7 +161,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
@@ -182,7 +184,8 @@ emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -206,12 +209,13 @@ for face in faces:
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=False,
|
||||
attributes=True # Uses AgeGender
|
||||
RetinaFace(),
|
||||
age_gender=AgeGender(),
|
||||
)
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
|
||||
@@ -1,27 +1,30 @@
|
||||
# Detection
|
||||
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides three detection models.
|
||||
Face detection is the first step in any face analysis pipeline. UniFace provides four detection models.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | WIDER FACE (Easy/Medium/Hard) | Best For |
|
||||
|-------|----------|------|-------------------------------|----------|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% / 91.0% / 86.6% | Balanced (recommended) |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% / 93.9% / 83.1% | High accuracy |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% / 92.6% / 83.2% | Real-time |
|
||||
| Model | Backbone | Size | Easy | Medium | Hard | Landmarks |
|
||||
|-------|----------|------|------|--------|------|:---------:|
|
||||
| **RetinaFace** | MobileNet V2 | 3.5 MB | 91.7% | 91.0% | 86.6% | :material-check: |
|
||||
| **SCRFD** | SCRFD-10G | 17 MB | 95.2% | 93.9% | 83.1% | :material-check: |
|
||||
| **YOLOv5-Face** | YOLOv5s | 28 MB | 94.3% | 92.6% | 83.2% | :material-check: |
|
||||
| **YOLOv8-Face** | YOLOv8n | 12 MB | 94.6% | 92.3% | 79.6% | :material-check: |
|
||||
|
||||
!!! note "Dataset"
|
||||
All models trained on WIDERFACE dataset.
|
||||
---
|
||||
|
||||
## RetinaFace
|
||||
|
||||
The recommended detector for most use cases.
|
||||
Single-shot face detector with multi-scale feature pyramid.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -35,7 +38,7 @@ for face in faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
@@ -65,7 +68,8 @@ detector = RetinaFace(
|
||||
confidence_threshold=0.5, # Min confidence
|
||||
nms_threshold=0.4, # NMS IoU threshold
|
||||
input_size=(640, 640), # Input resolution
|
||||
dynamic_size=False # Enable dynamic input size
|
||||
dynamic_size=False, # Enable dynamic input size
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
@@ -78,7 +82,7 @@ State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
@@ -87,7 +91,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
@@ -109,7 +113,8 @@ detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
input_size=(640, 640)
|
||||
input_size=(640, 640),
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
@@ -122,7 +127,7 @@ YOLO-based detection optimized for faces.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
@@ -131,7 +136,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
@@ -159,7 +164,57 @@ detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5
|
||||
nms_threshold=0.5,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## YOLOv8-Face
|
||||
|
||||
Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regression.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
```
|
||||
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface.detection import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8_LITE_S)
|
||||
|
||||
# Recommended (default)
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
```
|
||||
|
||||
| Variant | Size | Easy | Medium | Hard |
|
||||
|---------|------|------|--------|------|
|
||||
| YOLOV8_LITE_S | 7.4 MB | 93.4% | 91.2% | 78.6% |
|
||||
| **YOLOV8N** :material-check-circle: | 12 MB | 94.6% | 92.3% | 79.6% |
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
YOLOv8-Face uses a fixed input size of 640×640.
|
||||
|
||||
### Configuration
|
||||
|
||||
```python
|
||||
detector = YOLOv8Face(
|
||||
model_name=YOLOv8FaceWeights.YOLOV8N,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.45,
|
||||
nms_mode='numpy', # or 'torchvision' for faster NMS
|
||||
providers=None, # Auto-detect, or ['CPUExecutionProvider']
|
||||
)
|
||||
```
|
||||
|
||||
@@ -170,29 +225,15 @@ detector = YOLOv5Face(
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
detector = create_detector('scrfd')
|
||||
# or
|
||||
detector = create_detector('yolov5face')
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
faces = detect_faces(
|
||||
image,
|
||||
method='retinaface',
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
# or
|
||||
detector = create_detector('yolov8face')
|
||||
```
|
||||
|
||||
---
|
||||
@@ -219,7 +260,7 @@ for face in faces:
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
@@ -239,7 +280,7 @@ cv2.imwrite("result.jpg", image)
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg --iterations 100
|
||||
python tools/detect.py --source image.jpg
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -6,13 +6,13 @@ Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | MAE* | Best For |
|
||||
|-------|----------|------|------|----------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° | Balanced |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° | Recommended |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° | High accuracy |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° | Mobile |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° | Lightweight |
|
||||
| Model | Backbone | Size | MAE* |
|
||||
|-------|----------|------|------|
|
||||
| ResNet18 | ResNet18 | 43 MB | 12.84° |
|
||||
| **ResNet34** :material-check-circle: | ResNet34 | 82 MB | 11.33° |
|
||||
| ResNet50 | ResNet50 | 91 MB | 11.34° |
|
||||
| MobileNetV2 | MobileNetV2 | 9.6 MB | 13.07° |
|
||||
| MobileOne-S0 | MobileOne | 4.8 MB | 12.58° |
|
||||
|
||||
*MAE = Mean Absolute Error on Gaze360 test set (lower is better)
|
||||
|
||||
@@ -23,7 +23,8 @@ Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -52,7 +53,7 @@ for face in faces:
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
@@ -102,7 +103,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
@@ -154,8 +155,9 @@ def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -256,7 +258,7 @@ print(f"Looking: {direction}")
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
from uniface.gaze import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
172
docs/modules/indexing.md
Normal file
@@ -0,0 +1,172 @@
|
||||
# Indexing
|
||||
|
||||
FAISS-backed vector store for fast similarity search over embeddings.
|
||||
|
||||
!!! info "Optional dependency"
|
||||
```bash
|
||||
pip install faiss-cpu
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FAISS
|
||||
|
||||
```python
|
||||
from uniface.indexing import FAISS
|
||||
```
|
||||
|
||||
A thin wrapper around a FAISS `IndexFlatIP` (inner-product) index. Vectors
|
||||
**must** be L2-normalised before adding so that inner product equals cosine
|
||||
similarity. The store does not normalise internally.
|
||||
|
||||
Each vector is paired with a metadata `dict` that can carry any
|
||||
JSON-serialisable payload (person ID, name, source path, etc.).
|
||||
|
||||
### Constructor
|
||||
|
||||
```python
|
||||
store = FAISS(embedding_size=512, db_path="./vector_index")
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding_size` | `int` | `512` | Dimension of embedding vectors |
|
||||
| `db_path` | `str` | `"./vector_index"` | Directory for persisting index and metadata |
|
||||
|
||||
---
|
||||
|
||||
### Methods
|
||||
|
||||
#### `add(embedding, metadata)`
|
||||
|
||||
Add a single embedding with associated metadata.
|
||||
|
||||
```python
|
||||
store.add(embedding, {"person_id": "alice", "source": "photo.jpg"})
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `embedding` | `np.ndarray` | L2-normalised embedding vector |
|
||||
| `metadata` | `dict[str, Any]` | Arbitrary JSON-serialisable key-value pairs |
|
||||
|
||||
---
|
||||
|
||||
#### `search(embedding, threshold=0.4)`
|
||||
|
||||
Find the closest match for a query embedding.
|
||||
|
||||
```python
|
||||
result, similarity = store.search(query_embedding, threshold=0.4)
|
||||
if result:
|
||||
print(result["person_id"], similarity)
|
||||
```
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|-----------|------|---------|-------------|
|
||||
| `embedding` | `np.ndarray` | — | L2-normalised query vector |
|
||||
| `threshold` | `float` | `0.4` | Minimum cosine similarity to accept a match |
|
||||
|
||||
**Returns:** `(metadata, similarity)` if a match is found, or `(None, similarity)` when below threshold or the index is empty.
|
||||
|
||||
---
|
||||
|
||||
#### `remove(key, value)`
|
||||
|
||||
Remove all entries where `metadata[key] == value` and rebuild the index.
|
||||
|
||||
```python
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
```
|
||||
|
||||
| Parameter | Type | Description |
|
||||
|-----------|------|-------------|
|
||||
| `key` | `str` | Metadata key to match |
|
||||
| `value` | `Any` | Value to match |
|
||||
|
||||
**Returns:** Number of entries removed.
|
||||
|
||||
---
|
||||
|
||||
#### `save()`
|
||||
|
||||
Persist the FAISS index and metadata to disk.
|
||||
|
||||
```python
|
||||
store.save()
|
||||
```
|
||||
|
||||
Writes two files to `db_path`:
|
||||
|
||||
- `faiss_index.bin` — binary FAISS index
|
||||
- `metadata.json` — JSON array of metadata dicts
|
||||
|
||||
---
|
||||
|
||||
#### `load()`
|
||||
|
||||
Load a previously saved index and metadata.
|
||||
|
||||
```python
|
||||
store = FAISS(db_path="./vector_index")
|
||||
loaded = store.load() # True if files exist
|
||||
```
|
||||
|
||||
**Returns:** `True` if loaded successfully, `False` if files are missing.
|
||||
|
||||
**Raises:** `RuntimeError` if files exist but cannot be read.
|
||||
|
||||
---
|
||||
|
||||
### Properties
|
||||
|
||||
| Property | Type | Description |
|
||||
|----------|------|-------------|
|
||||
| `size` | `int` | Number of vectors in the index |
|
||||
| `len(store)` | `int` | Same as `size` |
|
||||
|
||||
---
|
||||
|
||||
## Example: End-to-End
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
# Build
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
image = cv2.imread("alice.jpg")
|
||||
faces = detector.detect(image)
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(embedding, {"person_id": "alice"})
|
||||
store.save()
|
||||
|
||||
# Search
|
||||
store2 = FAISS(db_path="./my_index")
|
||||
store2.load()
|
||||
|
||||
query = cv2.imread("unknown.jpg")
|
||||
faces = detector.detect(query)
|
||||
emb = recognizer.get_normalized_embedding(query, faces[0].landmarks)
|
||||
|
||||
result, sim = store2.search(emb)
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} (similarity: {sim:.3f})")
|
||||
else:
|
||||
print(f"No match (similarity: {sim:.3f})")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Face Search Recipe](../recipes/face-search.md) - Building and querying indexes
|
||||
- [Recognition Module](recognition.md) - Embedding extraction
|
||||
- [Thresholds Guide](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
@@ -6,12 +6,12 @@ Facial landmark detection provides precise localization of facial features.
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Points | Size | Use Case |
|
||||
|-------|--------|------|----------|
|
||||
| **Landmark106** | 106 | 14 MB | Detailed face analysis |
|
||||
| Model | Points | Size |
|
||||
|-------|--------|------|
|
||||
| **Landmark106** | 106 | 14 MB |
|
||||
|
||||
!!! info "5-Point Landmarks"
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face).
|
||||
Basic 5-point landmarks are included with all detection models (RetinaFace, SCRFD, YOLOv5-Face, YOLOv8-Face).
|
||||
|
||||
---
|
||||
|
||||
@@ -20,7 +20,8 @@ Facial landmark detection provides precise localization of facial features.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
@@ -78,7 +79,7 @@ mouth = landmarks[87:106]
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -152,7 +153,7 @@ def draw_landmarks_with_connections(image, landmarks):
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
@@ -236,7 +237,7 @@ def estimate_head_pose(landmarks, image_shape):
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
from uniface.landmark import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
# Parsing
|
||||
|
||||
Face parsing segments faces into semantic components (skin, eyes, nose, mouth, hair, etc.).
|
||||
Face parsing segments faces into semantic components or face regions.
|
||||
|
||||
---
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Classes | Best For |
|
||||
|-------|----------|------|---------|----------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 | Balanced (recommended) |
|
||||
| **BiSeNet ResNet34** | ResNet34 | 89 MB | 19 | Higher accuracy |
|
||||
| Model | Backbone | Size | Output |
|
||||
|-------|----------|------|--------|
|
||||
| **BiSeNet ResNet18** :material-check-circle: | ResNet18 | 51 MB | 19 classes |
|
||||
| BiSeNet ResNet34 | ResNet34 | 89 MB | 19 classes |
|
||||
| XSeg | - | 67 MB | Mask |
|
||||
|
||||
---
|
||||
|
||||
@@ -18,7 +19,7 @@ Face parsing segments faces into semantic components (skin, eyes, nose, mouth, h
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
@@ -45,16 +46,16 @@ cv2.imwrite("parsed.jpg", vis_bgr)
|
||||
|
||||
| ID | Class | ID | Class |
|
||||
|----|-------|----|-------|
|
||||
| 0 | Background | 10 | Ear Ring |
|
||||
| 1 | Skin | 11 | Nose |
|
||||
| 2 | Left Eyebrow | 12 | Mouth |
|
||||
| 3 | Right Eyebrow | 13 | Upper Lip |
|
||||
| 4 | Left Eye | 14 | Lower Lip |
|
||||
| 5 | Right Eye | 15 | Neck |
|
||||
| 6 | Eye Glasses | 16 | Neck Lace |
|
||||
| 7 | Left Ear | 17 | Cloth |
|
||||
| 8 | Right Ear | 18 | Hair |
|
||||
| 9 | Hat | | |
|
||||
| 0 | Background | 10 | Nose |
|
||||
| 1 | Skin | 11 | Mouth |
|
||||
| 2 | Left Eyebrow | 12 | Upper Lip |
|
||||
| 3 | Right Eyebrow | 13 | Lower Lip |
|
||||
| 4 | Left Eye | 14 | Neck |
|
||||
| 5 | Right Eye | 15 | Necklace |
|
||||
| 6 | Eyeglasses | 16 | Cloth |
|
||||
| 7 | Left Ear | 17 | Hair |
|
||||
| 8 | Right Ear | 18 | Hat |
|
||||
| 9 | Earring | | |
|
||||
|
||||
---
|
||||
|
||||
@@ -71,10 +72,10 @@ parser = BiSeNet()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | Notes |
|
||||
|---------|--------|------|-------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB | Recommended |
|
||||
| RESNET34 | 24.1M | 89 MB | Higher accuracy |
|
||||
| Variant | Params | Size |
|
||||
|---------|--------|------|
|
||||
| **RESNET18** :material-check-circle: | 13.3M | 51 MB |
|
||||
| RESNET34 | 24.1M | 89 MB |
|
||||
|
||||
---
|
||||
|
||||
@@ -84,9 +85,9 @@ parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
@@ -125,7 +126,7 @@ mask = parser.parse(face_image)
|
||||
|
||||
# Extract specific component
|
||||
SKIN = 1
|
||||
HAIR = 18
|
||||
HAIR = 17
|
||||
LEFT_EYE = 4
|
||||
RIGHT_EYE = 5
|
||||
|
||||
@@ -148,10 +149,10 @@ mask = parser.parse(face_image)
|
||||
|
||||
component_names = {
|
||||
0: 'Background', 1: 'Skin', 2: 'L-Eyebrow', 3: 'R-Eyebrow',
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Glasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Hat', 10: 'Earring', 11: 'Nose', 12: 'Mouth',
|
||||
13: 'U-Lip', 14: 'L-Lip', 15: 'Neck', 16: 'Necklace',
|
||||
17: 'Cloth', 18: 'Hair'
|
||||
4: 'L-Eye', 5: 'R-Eye', 6: 'Eyeglasses', 7: 'L-Ear', 8: 'R-Ear',
|
||||
9: 'Earring', 10: 'Nose', 11: 'Mouth',
|
||||
12: 'U-Lip', 13: 'L-Lip', 14: 'Neck', 15: 'Necklace',
|
||||
16: 'Cloth', 17: 'Hair', 18: 'Hat'
|
||||
}
|
||||
|
||||
for class_id in np.unique(mask):
|
||||
@@ -176,23 +177,19 @@ def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
# Get lip mask (upper lip=12, lower lip=13)
|
||||
lip_mask = ((mask == 12) | (mask == 13)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
# Alpha blend lip region
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
mask_3ch = lip_mask[:, :, np.newaxis]
|
||||
result = np.where(mask_3ch, (image * (1 - alpha) + overlay * alpha).astype(np.uint8), result)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
return result
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
@@ -218,7 +215,7 @@ def replace_background(image, mask, background):
|
||||
```python
|
||||
def get_hair_mask(mask):
|
||||
"""Extract clean hair mask."""
|
||||
hair_mask = (mask == 18).astype(np.uint8) * 255
|
||||
hair_mask = (mask == 17).astype(np.uint8) * 255
|
||||
|
||||
# Clean up with morphological operations
|
||||
kernel = np.ones((5, 5), np.uint8)
|
||||
@@ -233,7 +230,7 @@ def get_hair_mask(mask):
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
@@ -248,12 +245,83 @@ vis_result = vis_parsing_maps(
|
||||
|
||||
---
|
||||
|
||||
## XSeg
|
||||
|
||||
XSeg outputs a mask for face regions. Unlike BiSeNet which works on bbox crops, XSeg requires 5-point landmarks for face alignment.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = XSeg()
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
if face.landmarks is not None:
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W), values in [0, 1]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
```python
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
# Default settings
|
||||
parser = XSeg()
|
||||
|
||||
# Custom settings
|
||||
parser = XSeg(
|
||||
align_size=256, # Face alignment size
|
||||
blur_sigma=5, # Gaussian blur for smoothing (0 = raw)
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `align_size` | 256 | Face alignment output size |
|
||||
| `blur_sigma` | 0 | Mask smoothing (0 = no blur) |
|
||||
|
||||
### Methods
|
||||
|
||||
```python
|
||||
# Full pipeline: align -> segment -> warp back to original space
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
# For pre-aligned face crops
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
# Get mask + crop + inverse matrix for custom warping
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
```
|
||||
|
||||
### BiSeNet vs XSeg
|
||||
|
||||
| Feature | BiSeNet | XSeg |
|
||||
|---------|---------|------|
|
||||
| Output | 19 class labels | Mask [0, 1] |
|
||||
| Input | Bbox crop | Requires landmarks |
|
||||
| Use case | Facial components | Face region extraction |
|
||||
|
||||
---
|
||||
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
from uniface.parsing import create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
|
||||
parser = create_face_parser() # Returns BiSeNet
|
||||
# BiSeNet (default)
|
||||
parser = create_face_parser()
|
||||
|
||||
# XSeg
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -6,37 +6,20 @@ Face anonymization protects privacy by blurring or obscuring faces in images and
|
||||
|
||||
## Available Methods
|
||||
|
||||
| Method | Description | Use Case |
|
||||
|--------|-------------|----------|
|
||||
| **pixelate** | Blocky pixelation | News media standard |
|
||||
| **gaussian** | Smooth blur | Natural appearance |
|
||||
| **blackout** | Solid color fill | Maximum privacy |
|
||||
| **elliptical** | Oval-shaped blur | Natural face shape |
|
||||
| **median** | Edge-preserving blur | Artistic effect |
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| **pixelate** | Blocky pixelation |
|
||||
| **gaussian** | Smooth blur |
|
||||
| **blackout** | Solid color fill |
|
||||
| **elliptical** | Oval-shaped blur |
|
||||
| **median** | Edge-preserving blur |
|
||||
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
@@ -59,12 +42,12 @@ cv2.imwrite("anonymized.jpg", anonymized)
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=15)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `pixel_blocks` | 10 | Number of blocks (lower = more pixelated) |
|
||||
| `pixel_blocks` | 15 | Number of blocks (lower = more pixelated) |
|
||||
|
||||
### Gaussian
|
||||
|
||||
@@ -137,7 +120,7 @@ result = blurrer.anonymize(image, faces, inplace=True)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -166,7 +149,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -238,7 +221,7 @@ def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -259,13 +242,13 @@ for method in methods:
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -6,23 +6,24 @@ Face recognition extracts embeddings for identity verification and face search.
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Backbone | Size | Embedding Dim | Best For |
|
||||
|-------|----------|------|---------------|----------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 | High-quality recognition |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 | General use (recommended) |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 | Mobile/Edge |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 | Research |
|
||||
| Model | Backbone | Size | Embedding Dim |
|
||||
|-------|----------|------|---------------|
|
||||
| **AdaFace** | IR-18/IR-101 | 92-249 MB | 512 |
|
||||
| **ArcFace** | MobileNet/ResNet | 8-166 MB | 512 |
|
||||
| **MobileFace** | MobileNet V2/V3 | 1-10 MB | 512 |
|
||||
| **SphereFace** | Sphere20/36 | 50-92 MB | 512 |
|
||||
|
||||
---
|
||||
|
||||
## AdaFace
|
||||
|
||||
High-quality face recognition using adaptive margin based on image quality. AdaFace achieves state-of-the-art results on challenging benchmarks like IJB-B and IJB-C.
|
||||
Face recognition using adaptive margin based on image quality.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AdaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
@@ -39,7 +40,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import AdaFace
|
||||
from uniface.recognition import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -47,12 +48,15 @@ recognizer = AdaFace(model_name=AdaFaceWeights.IR_18)
|
||||
|
||||
# High accuracy
|
||||
recognizer = AdaFace(model_name=AdaFaceWeights.IR_101)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = AdaFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Dataset | Size | IJB-B | IJB-C | Use Case |
|
||||
|---------|---------|------|-------|-------|----------|
|
||||
| **IR_18** :material-check-circle: | WebFace4M | 92 MB | 93.03% | 94.99% | Balanced (default) |
|
||||
| IR_101 | WebFace12M | 249 MB | - | 97.66% | Maximum accuracy |
|
||||
| Variant | Dataset | Size | IJB-B | IJB-C |
|
||||
|---------|---------|------|-------|-------|
|
||||
| **IR_18** :material-check-circle: | WebFace4M | 92 MB | 93.03% | 94.99% |
|
||||
| IR_101 | WebFace12M | 249 MB | - | 97.66% |
|
||||
|
||||
!!! info "Benchmark Metrics"
|
||||
IJB-B and IJB-C accuracy reported as TAR@FAR=0.01%
|
||||
@@ -61,12 +65,13 @@ recognizer = AdaFace(model_name=AdaFaceWeights.IR_101)
|
||||
|
||||
## ArcFace
|
||||
|
||||
State-of-the-art recognition using additive angular margin loss.
|
||||
Face recognition using additive angular margin loss.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -83,7 +88,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -91,23 +96,31 @@ recognizer = ArcFace(model_name=ArcFaceWeights.MNET)
|
||||
|
||||
# High accuracy
|
||||
recognizer = ArcFace(model_name=ArcFaceWeights.RESNET)
|
||||
|
||||
# Force CPU execution
|
||||
recognizer = ArcFace(providers=['CPUExecutionProvider'])
|
||||
```
|
||||
|
||||
| Variant | Backbone | Size | Use Case |
|
||||
|---------|----------|------|----------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | Balanced (recommended) |
|
||||
| RESNET | ResNet50 | 166 MB | Maximum accuracy |
|
||||
| Variant | Backbone | Size | LFW | CFP-FP | AgeDB-30 | IJB-C |
|
||||
|---------|----------|------|-----|--------|----------|-------|
|
||||
| **MNET** :material-check-circle: | MobileNet | 8 MB | 99.70% | 98.00% | 96.58% | 95.02% |
|
||||
| RESNET | ResNet50 | 166 MB | 99.83% | 99.33% | 98.23% | 97.25% |
|
||||
|
||||
!!! info "Training Data & Metrics"
|
||||
**Dataset**: Trained on WebFace600K (600K images)
|
||||
|
||||
**Accuracy**: IJB-C reported as TAR@FAR=1e-4
|
||||
|
||||
---
|
||||
|
||||
## MobileFace
|
||||
|
||||
Lightweight recognition for resource-constrained environments.
|
||||
Lightweight face recognition models with MobileNet backbones.
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
@@ -116,7 +129,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
@@ -129,33 +142,33 @@ recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V3_LARGE)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | Use Case |
|
||||
|---------|--------|------|-----|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.8% | Ultra-lightweight |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.6% | Mobile/Edge |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.3% | Mobile optimized |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.5% | Balanced mobile |
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| MNET_025 | 0.36M | 1 MB | 98.76% | 92.02% | 82.37% | 90.02% |
|
||||
| **MNET_V2** :material-check-circle: | 2.29M | 4 MB | 99.55% | 94.87% | 86.89% | 95.16% |
|
||||
| MNET_V3_SMALL | 1.25M | 3 MB | 99.30% | 93.77% | 85.29% | 92.79% |
|
||||
| MNET_V3_LARGE | 3.52M | 10 MB | 99.53% | 94.56% | 86.79% | 95.13% |
|
||||
|
||||
---
|
||||
|
||||
## SphereFace
|
||||
|
||||
Recognition using angular softmax loss (A-Softmax).
|
||||
Face recognition using angular softmax loss (A-Softmax).
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.recognition import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
```
|
||||
|
||||
| Variant | Params | Size | LFW | Use Case |
|
||||
|---------|--------|------|-----|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.7% | Research |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.7% | Research |
|
||||
| Variant | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 |
|
||||
|---------|--------|------|-----|-------|-------|----------|
|
||||
| SPHERE20 | 24.5M | 50 MB | 99.67% | 95.61% | 88.75% | 96.58% |
|
||||
| SPHERE36 | 34.6M | 92 MB | 99.72% | 95.64% | 89.92% | 96.83% |
|
||||
|
||||
---
|
||||
|
||||
@@ -164,7 +177,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
@@ -200,7 +213,7 @@ Recognition models require aligned faces. UniFace handles this internally:
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
@@ -212,7 +225,8 @@ aligned_face = face_alignment(image, landmarks)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -271,7 +285,7 @@ else:
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
|
||||
@@ -6,10 +6,10 @@ Face anti-spoofing detects whether a face is real (live) or fake (photo, video r
|
||||
|
||||
## Available Models
|
||||
|
||||
| Model | Size | Notes |
|
||||
|-------|------|-------|
|
||||
| MiniFASNet V1SE | 1.2 MB | Squeeze-and-Excitation variant |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB | Improved version (recommended) |
|
||||
| Model | Size |
|
||||
|-------|------|
|
||||
| MiniFASNet V1SE | 1.2 MB |
|
||||
| **MiniFASNet V2** :material-check-circle: | 1.2 MB |
|
||||
|
||||
---
|
||||
|
||||
@@ -17,7 +17,7 @@ Face anti-spoofing detects whether a face is real (live) or fake (photo, video r
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -69,20 +69,21 @@ spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
|
||||
## Confidence Thresholds
|
||||
|
||||
The default threshold is 0.5. Adjust for your use case:
|
||||
`result.is_real` is based on the model's top predicted class (argmax). If you want stricter behavior,
|
||||
apply your own confidence threshold:
|
||||
|
||||
```python
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
|
||||
# High security (fewer false accepts)
|
||||
HIGH_THRESHOLD = 0.7
|
||||
if result.confidence > HIGH_THRESHOLD:
|
||||
if result.is_real and result.confidence > HIGH_THRESHOLD:
|
||||
print("Real (high confidence)")
|
||||
else:
|
||||
print("Suspicious")
|
||||
|
||||
# Balanced
|
||||
if result.is_real: # Uses default 0.5 threshold
|
||||
# Balanced (argmax decision)
|
||||
if result.is_real:
|
||||
print("Real")
|
||||
else:
|
||||
print("Fake")
|
||||
@@ -127,7 +128,7 @@ cv2.imwrite("spoofing_result.jpg", image)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -252,7 +253,7 @@ python tools/spoofing.py --source 0
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
263
docs/modules/tracking.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Tracking
|
||||
|
||||
Multi-object tracking using [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) with Kalman filtering and IoU-based association. The tracker assigns persistent IDs to detected objects across video frames using a two-stage association strategy — first matching high-confidence detections, then low-confidence ones.
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
BYTETracker takes detection bounding boxes as input and returns tracked bounding boxes with persistent IDs. It does not depend on any specific detector — any source of `[x1, y1, x2, y2, score]` arrays will work.
|
||||
|
||||
Each frame, the tracker:
|
||||
|
||||
1. Splits detections into high-confidence and low-confidence groups
|
||||
2. Matches high-confidence detections to existing tracks using IoU
|
||||
3. Matches remaining tracks to low-confidence detections (second chance)
|
||||
4. Starts new tracks for unmatched high-confidence detections
|
||||
5. Removes tracks that have been lost for too long
|
||||
|
||||
The Kalman filter predicts where each track will be in the next frame, which helps maintain associations even when detections are noisy.
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# 1. Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# 2. Build detections array: [x1, y1, x2, y2, score]
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# 3. Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# 4. Map track IDs back to face objects
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
# 5. Draw
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
Each track ID gets a deterministic color via golden-ratio hue stepping, so the same person keeps the same color across the entire video.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking - Press 'q' to quit", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parameters
|
||||
|
||||
```python
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.5,
|
||||
track_buffer=30,
|
||||
match_thresh=0.8,
|
||||
low_thresh=0.1,
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `track_thresh` | 0.5 | Detections above this score go through first-pass association |
|
||||
| `track_buffer` | 30 | How many frames to keep a lost track before removing it |
|
||||
| `match_thresh` | 0.8 | IoU threshold for matching tracks to detections |
|
||||
| `low_thresh` | 0.1 | Detections below this score are discarded entirely |
|
||||
|
||||
---
|
||||
|
||||
## Input / Output
|
||||
|
||||
**Input** — `(N, 5)` numpy array with `[x1, y1, x2, y2, confidence]` per detection:
|
||||
|
||||
```python
|
||||
detections = np.array([
|
||||
[100, 50, 200, 160, 0.95],
|
||||
[300, 80, 380, 200, 0.87],
|
||||
])
|
||||
```
|
||||
|
||||
**Output** — `(M, 5)` numpy array with `[x1, y1, x2, y2, track_id]` per active track:
|
||||
|
||||
```python
|
||||
tracks = tracker.update(detections)
|
||||
# array([[101.2, 51.3, 199.8, 159.8, 1.],
|
||||
# [300.5, 80.2, 379.7, 200.1, 2.]])
|
||||
```
|
||||
|
||||
The output bounding boxes come from the Kalman filter prediction, so they may differ slightly from the input. Track IDs are integers that persist across frames for the same object.
|
||||
|
||||
---
|
||||
|
||||
## Resetting the Tracker
|
||||
|
||||
When switching to a different video or scene, reset the tracker to clear all internal state:
|
||||
|
||||
```python
|
||||
tracker.reset()
|
||||
```
|
||||
|
||||
This clears all active, lost, and removed tracks, resets the frame counter, and resets the ID counter back to zero.
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
`draw_tracks` draws bounding boxes color-coded by track ID:
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
draw_tracks(
|
||||
image=frame,
|
||||
faces=tracked_faces,
|
||||
draw_landmarks=True,
|
||||
draw_id=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Small Face Performance
|
||||
|
||||
!!! warning "Tracking performance with small faces"
|
||||
The tracker relies on IoU (Intersection over Union) to match detections across
|
||||
frames. When faces occupy a small portion of the image — for example in
|
||||
surveillance footage or wide-angle cameras — even slight movement between frames
|
||||
can cause a large drop in IoU. This makes it harder for the tracker to maintain
|
||||
consistent IDs, and you may see IDs switching or resetting more often than expected.
|
||||
|
||||
This is not specific to BYTETracker; it applies to any IoU-based tracker. A few
|
||||
things that can help:
|
||||
|
||||
- **Lower `match_thresh`** (e.g. `0.5` or `0.6`) so the tracker accepts lower
|
||||
overlap as a valid match.
|
||||
- **Increase `track_buffer`** (e.g. `60` or higher) to hold onto lost tracks
|
||||
longer before discarding them.
|
||||
- **Use a higher-resolution input** if possible, so face bounding boxes are
|
||||
larger in pixel terms.
|
||||
|
||||
```python
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.4,
|
||||
track_buffer=60,
|
||||
match_thresh=0.6,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Tool
|
||||
|
||||
```bash
|
||||
# Track faces in a video
|
||||
python tools/track.py --source video.mp4
|
||||
|
||||
# Webcam
|
||||
python tools/track.py --source 0
|
||||
|
||||
# Save output
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
|
||||
# Use RetinaFace instead of SCRFD
|
||||
python tools/track.py --source video.mp4 --detector retinaface
|
||||
|
||||
# Keep lost tracks longer
|
||||
python tools/track.py --source video.mp4 --track-buffer 60
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [yakhyo/bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) — standalone BYTETracker implementation used in UniFace
|
||||
- [ByteTrack paper](https://arxiv.org/abs/2110.06864) — Zhang et al., "ByteTrack: Multi-Object Tracking by Associating Every Detection Box"
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection](detection.md) — face detection models
|
||||
- [Video & Webcam](../recipes/video-webcam.md) — video processing patterns
|
||||
- [Inputs & Outputs](../concepts/inputs-outputs.md) — data types and formats
|
||||
@@ -16,6 +16,8 @@ Run UniFace examples directly in your browser with Google Colab, or download and
|
||||
| [Face Parsing](https://github.com/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/06_face_parsing.ipynb) | Semantic face segmentation |
|
||||
| [Face Anonymization](https://github.com/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/07_face_anonymization.ipynb) | Privacy-preserving blur |
|
||||
| [Gaze Estimation](https://github.com/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/08_gaze_estimation.ipynb) | Gaze direction estimation |
|
||||
| [Face Segmentation](https://github.com/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/09_face_segmentation.ipynb) | Face segmentation with XSeg |
|
||||
| [Face Vector Store](https://github.com/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | [](https://colab.research.google.com/github/yakhyo/uniface/blob/main/examples/10_face_vector_store.ipynb) | FAISS-backed face database |
|
||||
|
||||
---
|
||||
|
||||
|
||||
7
docs/overrides/main.html
Normal file
@@ -0,0 +1,7 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block announce %}
|
||||
<a href="https://github.com/yakhyo/uniface" target="_blank" rel="noopener">
|
||||
Support our work — give UniFace a <span class="twemoji">{% include ".icons/octicons/star-fill-16.svg" %}</span> on <strong>GitHub</strong> and help us reach more developers!
|
||||
</a>
|
||||
{% endblock %}
|
||||
@@ -10,7 +10,7 @@ Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
@@ -46,8 +46,8 @@ Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
@@ -81,7 +81,8 @@ Compare two faces:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -121,7 +122,8 @@ if faces1 and faces2:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -152,7 +154,8 @@ Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -178,7 +181,8 @@ Face 2: Female, 20-29, White
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
@@ -204,8 +208,9 @@ if faces:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -237,7 +242,7 @@ Segment face into semantic components:
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
@@ -261,26 +266,24 @@ print(f"Detected {len(np.unique(mask))} facial components")
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Custom blur settings:**
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
@@ -301,7 +304,7 @@ Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -324,8 +327,8 @@ Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -355,20 +358,75 @@ cv2.destroyAllWindows()
|
||||
|
||||
---
|
||||
|
||||
## Face Tracking
|
||||
|
||||
Track faces across video frames with persistent IDs:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# Assign track IDs to faces
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details, see the [Tracking module](modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons, benchmarks, and selection guidance, see the [Model Zoo](models.md).
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
|
||||
**Quick recommendations:**
|
||||
**Available models by task:**
|
||||
|
||||
| Task | Recommended Model | Alternative |
|
||||
|------|-------------------|-------------|
|
||||
| Detection (balanced) | `RetinaFace` (MNET_V2) | `YOLOv5Face` (YOLOV5S) |
|
||||
| Detection (speed) | `RetinaFace` (MNET_025) | `SCRFD` (SCRFD_500M) |
|
||||
| Detection (accuracy) | `SCRFD` (SCRFD_10G) | `RetinaFace` (RESNET34) |
|
||||
| Recognition | `ArcFace` (MNET) | `AdaFace` (IR_18) |
|
||||
| Gaze | `MobileGaze` (RESNET34) | `MobileGaze` (MOBILEONE_S0) |
|
||||
| Parsing | `BiSeNet` (RESNET18) | `BiSeNet` (RESNET34) |
|
||||
| Task | Available Models |
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Tracking | `BYTETracker` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
| Anti-Spoofing | `MiniFASNet` (V1SE, V2) |
|
||||
|
||||
---
|
||||
|
||||
@@ -407,13 +465,18 @@ python -c "import platform; print(platform.machine())"
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, AdaFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.parsing import BiSeNet, XSeg
|
||||
from uniface.privacy import BlurFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.indexing import FAISS # pip install faiss-cpu
|
||||
from uniface.draw import draw_detections, draw_tracks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -11,7 +11,7 @@ Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -40,7 +40,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -67,14 +67,19 @@ out.release()
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
## Single Image
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
faces = detector.detect(image)
|
||||
result = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
@@ -84,7 +89,7 @@ cv2.imwrite("anonymized.jpg", result)
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=15)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
|
||||
@@ -12,7 +12,7 @@ Process multiple images efficiently.
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -54,7 +54,8 @@ for image_path in tqdm(image_files, desc="Processing"):
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -27,29 +27,29 @@ import numpy as np
|
||||
|
||||
class MyDetector(BaseDetector):
|
||||
def __init__(self, model_path: str, confidence_threshold: float = 0.5):
|
||||
super().__init__(confidence_threshold=confidence_threshold)
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.threshold = confidence_threshold
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
raise NotImplementedError
|
||||
|
||||
def postprocess(self, outputs, shape) -> list[Face]:
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
raise NotImplementedError
|
||||
|
||||
def detect(self, image: np.ndarray) -> list[Face]:
|
||||
# 1. Preprocess image
|
||||
input_tensor = self._preprocess(image)
|
||||
input_tensor = self.preprocess(image)
|
||||
|
||||
# 2. Run inference
|
||||
outputs = self.session.run(None, {'input': input_tensor})
|
||||
|
||||
# 3. Postprocess outputs to Face objects
|
||||
faces = self._postprocess(outputs, image.shape)
|
||||
return faces
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
# e.g., resize, normalize, transpose
|
||||
pass
|
||||
|
||||
def _postprocess(self, outputs, shape):
|
||||
# Your postprocessing logic
|
||||
# e.g., decode boxes, apply NMS, create Face objects
|
||||
pass
|
||||
return self.postprocess(outputs, image.shape)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -57,36 +57,14 @@ class MyDetector(BaseDetector):
|
||||
## Add Custom Recognition Model
|
||||
|
||||
```python
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface import face_alignment
|
||||
import numpy as np
|
||||
from uniface.recognition.base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
class MyRecognizer(BaseRecognizer):
|
||||
def __init__(self, model_path: str):
|
||||
self.session = create_onnx_session(model_path)
|
||||
def __init__(self, model_path: str, providers=None):
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
super().__init__(model_path, preprocessing, providers=providers)
|
||||
|
||||
def get_normalized_embedding(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
landmarks: np.ndarray
|
||||
) -> np.ndarray:
|
||||
# 1. Align face
|
||||
aligned = face_alignment(image, landmarks)
|
||||
|
||||
# 2. Preprocess
|
||||
input_tensor = self._preprocess(aligned)
|
||||
|
||||
# 3. Run inference
|
||||
embedding = self.session.run(None, {'input': input_tensor})[0]
|
||||
|
||||
# 4. Normalize
|
||||
embedding = embedding / np.linalg.norm(embedding)
|
||||
return embedding
|
||||
|
||||
def _preprocess(self, image):
|
||||
# Your preprocessing logic
|
||||
pass
|
||||
# Optional: override preprocess() if your model expects custom normalization.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -1,178 +1,166 @@
|
||||
# Face Search
|
||||
|
||||
Build a face search system for finding people in images.
|
||||
Find and identify people in images and video streams.
|
||||
|
||||
!!! note "Work in Progress"
|
||||
This page contains example code patterns. Test thoroughly before using in production.
|
||||
UniFace supports two search approaches:
|
||||
|
||||
| Approach | Use case | Tool |
|
||||
| -------------------- | ------------------------------------------------ | ----------------------- |
|
||||
| **Reference search** | "Is this specific person in the video?" | `tools/search.py` |
|
||||
| **Vector search** | "Who is this?" against a database of known faces | `tools/faiss_search.py` |
|
||||
|
||||
---
|
||||
|
||||
## Basic Face Database
|
||||
## Reference Search (single image)
|
||||
|
||||
Compare every detected face against a single reference photo:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
ref_image = cv2.imread("reference.jpg")
|
||||
ref_faces = detector.detect(ref_image)
|
||||
ref_embedding = recognizer.get_normalized_embedding(ref_image, ref_faces[0].landmarks)
|
||||
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(query_image)
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(query_image, face.landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f"Match ({sim:.2f})" if sim > 0.4 else f"Unknown ({sim:.2f})"
|
||||
print(label)
|
||||
```
|
||||
|
||||
**CLI tool:**
|
||||
|
||||
```bash
|
||||
python tools/search.py --reference ref.jpg --source video.mp4
|
||||
python tools/search.py --reference ref.jpg --source 0 # webcam
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Vector Search (FAISS index)
|
||||
|
||||
For identifying faces against a database of many known people, use the
|
||||
[`FAISS`](../modules/indexing.md) vector store.
|
||||
|
||||
!!! info "Install extra"
|
||||
`bash
|
||||
pip install faiss-cpu
|
||||
`
|
||||
|
||||
### Build an index
|
||||
|
||||
Organise face images in person sub-folders:
|
||||
|
||||
```
|
||||
dataset/
|
||||
├── alice/
|
||||
│ ├── 001.jpg
|
||||
│ └── 002.jpg
|
||||
├── bob/
|
||||
│ └── 001.jpg
|
||||
└── charlie/
|
||||
├── 001.jpg
|
||||
└── 002.jpg
|
||||
```
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
self.detector = RetinaFace()
|
||||
self.recognizer = ArcFace()
|
||||
self.embeddings = {}
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
store = FAISS(db_path="./my_index")
|
||||
|
||||
def add_face(self, person_id, image):
|
||||
"""Add a face to the database."""
|
||||
faces = self.detector.detect(image)
|
||||
if not faces:
|
||||
raise ValueError(f"No face found for {person_id}")
|
||||
for person_dir in sorted(Path("dataset").iterdir()):
|
||||
if not person_dir.is_dir():
|
||||
continue
|
||||
for img_path in person_dir.glob("*.jpg"):
|
||||
image = cv2.imread(str(img_path))
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
emb = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(emb, {"person_id": person_dir.name, "source": str(img_path)})
|
||||
|
||||
face = max(faces, key=lambda f: f.confidence)
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
self.embeddings[person_id] = embedding
|
||||
return True
|
||||
|
||||
def search(self, image, threshold=0.6):
|
||||
"""Search for faces in an image."""
|
||||
faces = self.detector.detect(image)
|
||||
results = []
|
||||
|
||||
for face in faces:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
|
||||
best_match = None
|
||||
best_similarity = -1
|
||||
|
||||
for person_id, db_embedding in self.embeddings.items():
|
||||
similarity = np.dot(embedding, db_embedding.T)[0][0]
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = person_id
|
||||
|
||||
results.append({
|
||||
'bbox': face.bbox,
|
||||
'match': best_match if best_similarity >= threshold else None,
|
||||
'similarity': best_similarity
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def save(self, path):
|
||||
"""Save database to file."""
|
||||
np.savez(path, embeddings=dict(self.embeddings))
|
||||
|
||||
def load(self, path):
|
||||
"""Load database from file."""
|
||||
data = np.load(path, allow_pickle=True)
|
||||
self.embeddings = data['embeddings'].item()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
|
||||
# Add faces
|
||||
for image_path in Path("known_faces/").glob("*.jpg"):
|
||||
person_id = image_path.stem
|
||||
image = cv2.imread(str(image_path))
|
||||
try:
|
||||
db.add_face(person_id, image)
|
||||
print(f"Added: {person_id}")
|
||||
except ValueError as e:
|
||||
print(f"Skipped: {e}")
|
||||
|
||||
# Save database
|
||||
db.save("face_database.npz")
|
||||
|
||||
# Search
|
||||
query_image = cv2.imread("group_photo.jpg")
|
||||
results = db.search(query_image)
|
||||
|
||||
for r in results:
|
||||
if r['match']:
|
||||
print(f"Found: {r['match']} (similarity: {r['similarity']:.3f})")
|
||||
store.save()
|
||||
print(f"Index saved: {store}")
|
||||
```
|
||||
|
||||
---
|
||||
**CLI tool:**
|
||||
|
||||
## Visualization
|
||||
```bash
|
||||
python tools/faiss_search.py build --faces-dir dataset/ --db-path ./my_index
|
||||
```
|
||||
|
||||
### Search against the index
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
def visualize_search_results(image, results):
|
||||
"""Draw search results on image."""
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0) # Green for match
|
||||
label = f"{r['match']} ({r['similarity']:.2f})"
|
||||
else:
|
||||
color = (0, 0, 255) # Red for unknown
|
||||
label = f"Unknown ({r['similarity']:.2f})"
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(image, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
image = cv2.imread("query.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
return image
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
result, similarity = store.search(embedding, threshold=0.4)
|
||||
|
||||
# Usage
|
||||
results = db.search(image)
|
||||
annotated = visualize_search_results(image.copy(), results)
|
||||
cv2.imwrite("search_result.jpg", annotated)
|
||||
if result:
|
||||
print(f"Matched: {result['person_id']} ({similarity:.2f})")
|
||||
else:
|
||||
print(f"Unknown ({similarity:.2f})")
|
||||
```
|
||||
|
||||
---
|
||||
**CLI tool:**
|
||||
|
||||
## Real-Time Search
|
||||
```bash
|
||||
python tools/faiss_search.py run --db-path ./my_index --source video.mp4
|
||||
python tools/faiss_search.py run --db-path ./my_index --source 0 # webcam
|
||||
```
|
||||
|
||||
### Manage the index
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
def realtime_search(db):
|
||||
"""Real-time face search from webcam."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
store = FAISS(db_path="./my_index")
|
||||
store.load()
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
print(f"Total vectors: {len(store)}")
|
||||
|
||||
results = db.search(frame, threshold=0.5)
|
||||
removed = store.remove("person_id", "bob")
|
||||
print(f"Removed {removed} entries")
|
||||
|
||||
for r in results:
|
||||
x1, y1, x2, y2 = map(int, r['bbox'])
|
||||
|
||||
if r['match']:
|
||||
color = (0, 255, 0)
|
||||
label = r['match']
|
||||
else:
|
||||
color = (0, 0, 255)
|
||||
label = "Unknown"
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Search", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Usage
|
||||
db = FaceDatabase()
|
||||
db.load("face_database.npz")
|
||||
realtime_search(db)
|
||||
store.save()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Indexing Module](../modules/indexing.md) - Full `FAISS` API reference
|
||||
- [Recognition Module](../modules/recognition.md) - Face recognition details
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Video & Webcam](video-webcam.md) - Real-time processing
|
||||
- [Concepts: Thresholds](../concepts/thresholds-calibration.md) - Tuning similarity thresholds
|
||||
|
||||
@@ -8,8 +8,10 @@ A complete pipeline for processing images with detection, recognition, and attri
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -67,14 +69,21 @@ cv2.imwrite("result.jpg", result_image)
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
detect=True,
|
||||
recognize=True,
|
||||
attributes=True
|
||||
detector,
|
||||
recognizer=recognizer,
|
||||
age_gender=age_gender,
|
||||
)
|
||||
|
||||
# Process image
|
||||
@@ -97,13 +106,14 @@ Complete pipeline with all modules:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
@@ -189,8 +199,10 @@ for i, r in enumerate(results):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
|
||||
@@ -11,8 +11,8 @@ Real-time face analysis for video streams.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -48,7 +48,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
@@ -83,6 +83,57 @@ process_video("input.mp4", "output.mp4")
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
To track faces across frames with persistent IDs, pair a detector with `BYTETracker`:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details on tracker parameters and tuning, see [Tracking](../modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
@@ -119,7 +170,8 @@ while True:
|
||||
|
||||
## See Also
|
||||
|
||||
- [Tracking Module](../modules/tracking.md) - Face tracking with BYTETracker
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze estimation
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.0\n"
|
||||
"3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -62,7 +62,7 @@
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.visualization import draw_detections\n",
|
||||
"from uniface.draw import draw_detections\n",
|
||||
"\n",
|
||||
"print(uniface.__version__)"
|
||||
]
|
||||
@@ -162,7 +162,7 @@
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"# Draw detections\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, corner_bbox=True)\n",
|
||||
"\n",
|
||||
"# Display result\n",
|
||||
"output_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
|
||||
@@ -214,7 +214,7 @@
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, corner_bbox=True)\n",
|
||||
"\n",
|
||||
"output_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
|
||||
"display.display(Image.fromarray(output_image))"
|
||||
@@ -261,7 +261,7 @@
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, corner_bbox=True)\n",
|
||||
"\n",
|
||||
"output_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n",
|
||||
"display.display(Image.fromarray(output_image))"
|
||||
|
||||
@@ -55,7 +55,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.0\n"
|
||||
"3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -67,7 +67,7 @@
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.face_utils import face_alignment\n",
|
||||
"from uniface.visualization import draw_detections\n",
|
||||
"from uniface.draw import draw_detections\n",
|
||||
"\n",
|
||||
"print(uniface.__version__)"
|
||||
]
|
||||
@@ -142,7 +142,7 @@
|
||||
" bboxes = [f.bbox for f in faces]\n",
|
||||
" scores = [f.confidence for f in faces]\n",
|
||||
" landmarks = [f.landmarks for f in faces]\n",
|
||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, corner_bbox=True)\n",
|
||||
"\n",
|
||||
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
||||
" first_landmarks = faces[0].landmarks\n",
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.0\n"
|
||||
"3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -53,7 +53,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"\n",
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -49,7 +49,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.0\n"
|
||||
"3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -58,7 +58,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"\n",
|
||||
@@ -69,16 +69,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n",
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2.0.0\n"
|
||||
"3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -60,11 +60,11 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"from uniface.attribute import AgeGender\n",
|
||||
"from uniface.visualization import draw_detections\n",
|
||||
"from uniface.draw import draw_detections\n",
|
||||
"\n",
|
||||
"print(uniface.__version__)"
|
||||
]
|
||||
@@ -148,7 +148,7 @@
|
||||
" bboxes = [f.bbox for f in faces]\n",
|
||||
" scores = [f.confidence for f in faces]\n",
|
||||
" landmarks = [f.landmarks for f in faces]\n",
|
||||
" draw_detections(image=vis_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.5, fancy_bbox=True)\n",
|
||||
" draw_detections(image=vis_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.5, corner_bbox=True)\n",
|
||||
"\n",
|
||||
" results.append((image_path, cv2.cvtColor(vis_image, cv2.COLOR_BGR2RGB), faces))"
|
||||
]
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -53,7 +53,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 2.0.0\n"
|
||||
"UniFace version: 3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -66,7 +66,7 @@
|
||||
"import uniface\n",
|
||||
"from uniface.parsing import BiSeNet\n",
|
||||
"from uniface.constants import ParsingWeights\n",
|
||||
"from uniface.visualization import vis_parsing_maps\n",
|
||||
"from uniface.draw import vis_parsing_maps\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
@@ -82,15 +82,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"✓ Model loaded (CoreML (Apple Silicon))\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Initialize face parser (uses ResNet18 by default)\n",
|
||||
"parser = BiSeNet(model_name=ParsingWeights.RESNET34) # use resnet34 for better accuracy"
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 2.0.0\n"
|
||||
"UniFace version: 3.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -65,7 +65,7 @@
|
||||
"import uniface\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.gaze import MobileGaze\n",
|
||||
"from uniface.visualization import draw_gaze\n",
|
||||
"from uniface.draw import draw_gaze\n",
|
||||
"\n",
|
||||
"print(f\"UniFace version: {uniface.__version__}\")"
|
||||
]
|
||||
@@ -110,19 +110,19 @@
|
||||
"text": [
|
||||
"Processing: image0.jpg\n",
|
||||
" Detected 1 face(s)\n",
|
||||
" Face 1: pitch=-0.0°, yaw=7.1°\n",
|
||||
" Face 1: pitch=7.1°, yaw=-0.0°\n",
|
||||
"Processing: image1.jpg\n",
|
||||
" Detected 1 face(s)\n",
|
||||
" Face 1: pitch=-3.3°, yaw=-5.6°\n",
|
||||
" Face 1: pitch=-5.6°, yaw=-3.3°\n",
|
||||
"Processing: image2.jpg\n",
|
||||
" Detected 1 face(s)\n",
|
||||
" Face 1: pitch=-3.9°, yaw=-0.3°\n",
|
||||
" Face 1: pitch=-0.3°, yaw=-3.9°\n",
|
||||
"Processing: image3.jpg\n",
|
||||
" Detected 1 face(s)\n",
|
||||
" Face 1: pitch=-22.1°, yaw=1.0°\n",
|
||||
" Face 1: pitch=1.0°, yaw=-22.1°\n",
|
||||
"Processing: image4.jpg\n",
|
||||
" Detected 1 face(s)\n",
|
||||
" Face 1: pitch=2.1°, yaw=5.0°\n",
|
||||
" Face 1: pitch=5.0°, yaw=2.1°\n",
|
||||
"\n",
|
||||
"Processed 5 images\n"
|
||||
]
|
||||
|
||||
503
examples/09_face_segmentation.ipynb
Normal file
366
examples/10_face_vector_store.ipynb
Normal file
@@ -48,6 +48,7 @@ theme:
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- announce.dismiss
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
@@ -134,6 +135,7 @@ nav:
|
||||
- Quickstart: quickstart.md
|
||||
- Notebooks: notebooks.md
|
||||
- Model Zoo: models.md
|
||||
- Datasets: datasets.md
|
||||
- Tutorials:
|
||||
- Image Pipeline: recipes/image-pipeline.md
|
||||
- Video & Webcam: recipes/video-webcam.md
|
||||
@@ -144,12 +146,14 @@ nav:
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Tracking: modules/tracking.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
- Gaze: modules/gaze.md
|
||||
- Anti-Spoofing: modules/spoofing.md
|
||||
- Privacy: modules/privacy.md
|
||||
- Indexing: modules/indexing.md
|
||||
- Guides:
|
||||
- Overview: concepts/overview.md
|
||||
- Inputs & Outputs: concepts/inputs-outputs.md
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "2.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
version = "3.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Tracking, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
@@ -9,10 +9,11 @@ maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.11,<3.14"
|
||||
requires-python = ">=3.10,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"face-tracking",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
@@ -28,11 +29,12 @@ keywords = [
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -41,9 +43,9 @@ classifiers = [
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"scipy>=1.7.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
@@ -55,9 +57,9 @@ gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
Documentation = "https://yakhyo.github.io/uniface"
|
||||
"Quick Start" = "https://yakhyo.github.io/uniface/quickstart/"
|
||||
"Model Zoo" = "https://yakhyo.github.io/uniface/models/"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
@@ -71,7 +73,7 @@ uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py311"
|
||||
target-version = "py310"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
@@ -127,15 +129,6 @@ section-order = [
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = false
|
||||
warn_unused_ignores = true
|
||||
ignore_missing_imports = true
|
||||
exclude = ["tests/", "scripts/", "examples/"]
|
||||
# Disable strict return type checking for numpy operations
|
||||
disable_error_code = ["no-any-return"]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
scipy>=1.7.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
61
tests/test_draw.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
|
||||
def _compute_gaze_delta(bbox: np.ndarray, pitch: float, yaw: float) -> tuple[int, int]:
|
||||
"""Replicate draw_gaze dx/dy math for verification."""
|
||||
x_min, _, x_max, _ = map(int, bbox[:4])
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(yaw) * np.cos(pitch))
|
||||
dy = int(-length * np.sin(pitch))
|
||||
return dx, dy
|
||||
|
||||
|
||||
def test_draw_gaze_yaw_only_moves_horizontally():
|
||||
"""Yaw-only input (pitch=0) should produce horizontal displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.5
|
||||
pitch = 0.0
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx != 0, 'Yaw-only should produce horizontal displacement'
|
||||
assert dy == 0, 'Yaw-only should produce zero vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_pitch_only_moves_vertically():
|
||||
"""Pitch-only input (yaw=0) should produce vertical displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.0
|
||||
pitch = 0.5
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx == 0, 'Pitch-only should produce zero horizontal displacement'
|
||||
assert dy != 0, 'Pitch-only should produce vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_modifies_image():
|
||||
"""draw_gaze should modify the image in place."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
original = image.copy()
|
||||
draw_gaze(image, bbox, 0.3, 0.3)
|
||||
|
||||
assert not np.array_equal(image, original), 'draw_gaze should modify the image'
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -13,10 +12,10 @@ from uniface import (
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.spoofing import MiniFASNet, create_spoofer
|
||||
|
||||
|
||||
# create_detector tests
|
||||
@@ -123,62 +122,6 @@ def test_create_landmarker_invalid_method():
|
||||
create_landmarker('invalid_method')
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
def test_list_available_detectors():
|
||||
"""
|
||||
@@ -280,3 +223,16 @@ def test_factory_returns_correct_types():
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
|
||||
# create_spoofer tests
|
||||
def test_create_spoofer_default():
|
||||
"""Test creating a spoofer with default parameters."""
|
||||
spoofer = create_spoofer()
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
|
||||
def test_create_spoofer_with_providers():
|
||||
"""Test that create_spoofer forwards providers kwarg without TypeError."""
|
||||
spoofer = create_spoofer(providers=['CPUExecutionProvider'])
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,15 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
from uniface.parsing import BiSeNet, XSeg, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
@@ -120,3 +119,151 @@ def test_bisenet_different_input_sizes():
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
# XSeg Tests
|
||||
|
||||
|
||||
def test_xseg_initialization():
|
||||
"""Test XSeg initialization."""
|
||||
parser = XSeg()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (256, 256)
|
||||
assert parser.align_size == 256
|
||||
assert parser.blur_sigma == 0
|
||||
|
||||
|
||||
def test_xseg_with_custom_params():
|
||||
"""Test XSeg with custom parameters."""
|
||||
parser = XSeg(align_size=512, blur_sigma=5)
|
||||
assert parser.align_size == 512
|
||||
assert parser.blur_sigma == 5
|
||||
|
||||
|
||||
def test_xseg_preprocess():
|
||||
"""Test XSeg preprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_crop)
|
||||
|
||||
assert preprocessed.shape == (1, 256, 256, 3) # NHWC format
|
||||
assert preprocessed.dtype == np.float32
|
||||
assert preprocessed.min() >= 0
|
||||
assert preprocessed.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_postprocess():
|
||||
"""Test XSeg postprocessing."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create dummy model output (NHWC format)
|
||||
dummy_output = np.random.rand(1, 256, 256, 1).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, crop_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_aligned():
|
||||
"""Test XSeg parse_aligned method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy aligned face crop
|
||||
face_crop = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_with_landmarks():
|
||||
"""Test XSeg parse method with landmarks."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200], # left eye
|
||||
[390, 200], # right eye
|
||||
[320, 280], # nose
|
||||
[260, 350], # left mouth
|
||||
[380, 350], # right mouth
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
assert mask.shape == (480, 640)
|
||||
assert mask.dtype == np.float32
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() <= 1
|
||||
|
||||
|
||||
def test_xseg_parse_invalid_landmarks():
|
||||
"""Test XSeg parse with invalid landmarks shape."""
|
||||
parser = XSeg()
|
||||
image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Wrong shape
|
||||
invalid_landmarks = np.array([[0, 0], [1, 1], [2, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match='Landmarks must have shape'):
|
||||
parser.parse(image, landmarks=invalid_landmarks)
|
||||
|
||||
|
||||
def test_xseg_parse_with_inverse():
|
||||
"""Test XSeg parse_with_inverse method."""
|
||||
parser = XSeg()
|
||||
|
||||
# Create a dummy image
|
||||
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
|
||||
|
||||
# Create dummy 5-point landmarks
|
||||
landmarks = np.array(
|
||||
[
|
||||
[250, 200],
|
||||
[390, 200],
|
||||
[320, 280],
|
||||
[260, 350],
|
||||
[380, 350],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
# Parse with inverse
|
||||
mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert face_crop.shape == (256, 256, 3)
|
||||
assert inverse_matrix.shape == (2, 3)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_enum():
|
||||
"""Test factory function with XSeg enum."""
|
||||
parser = create_face_parser(XSegWeights.DEFAULT)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
|
||||
|
||||
def test_create_face_parser_xseg_string():
|
||||
"""Test factory function with XSeg string."""
|
||||
parser = create_face_parser('xseg')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, XSeg)
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -6,26 +6,27 @@ CLI utilities for testing and running UniFace features.
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `detect.py` | Face detection on image, video, or webcam |
|
||||
| `track.py` | Face tracking on video with ByteTrack |
|
||||
| `analyze.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `recognize.py` | Face embedding extraction and comparison |
|
||||
| `search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `attribute.py` | Age and gender prediction |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `parse.py` | Face semantic segmentation (BiSeNet) |
|
||||
| `xseg.py` | Face segmentation (XSeg) |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
Most tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
@@ -34,26 +35,31 @@ All tools use a unified `--source` argument that accepts:
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
python tools/detect.py --source assets/test.jpg # image
|
||||
python tools/detect.py --source video.mp4 # video
|
||||
python tools/detect.py --source 0 # webcam
|
||||
|
||||
# Face tracking
|
||||
python tools/track.py --source video.mp4
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
python tools/track.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
python tools/anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
python tools/attribute.py --source assets/test.jpg
|
||||
python tools/attribute.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
python tools/emotion.py --source assets/test.jpg
|
||||
python tools/emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
python tools/gaze.py --source assets/test.jpg
|
||||
python tools/gaze.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
@@ -63,31 +69,31 @@ python tools/landmarks.py --source 0
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
# Face parsing (BiSeNet)
|
||||
python tools/parse.py --source assets/test.jpg
|
||||
python tools/parse.py --source 0
|
||||
|
||||
# Face segmentation (XSeg)
|
||||
python tools/xseg.py --source assets/test.jpg
|
||||
python tools/xseg.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
python tools/analyze.py --source assets/test.jpg
|
||||
python tools/analyze.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
python tools/recognize.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
python tools/recognize.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/search.py --reference person.jpg --source 0
|
||||
python tools/search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
@@ -117,5 +123,5 @@ python tools/download_model.py # downloads all
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
python tools/detect.py --source assets/test.jpg
|
||||
```
|
||||
|
||||
29
tools/_common.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera.
|
||||
|
||||
Args:
|
||||
source: File path or camera ID string (e.g. ``"0"``).
|
||||
|
||||
Returns:
|
||||
One of ``"image"``, ``"video"``, ``"camera"``, or ``"unknown"``.
|
||||
"""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
suffix = Path(source).suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
if suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
return 'unknown'
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
python tools/analyze.py --source path/to/image.jpg
|
||||
python tools/analyze.py --source path/to/video.mp4
|
||||
python tools/analyze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,28 +16,15 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
@@ -111,7 +98,7 @@ def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_sim
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
@@ -153,7 +140,7 @@ def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
@@ -189,7 +176,7 @@ def run_camera(analyzer, camera_id: int = 0):
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
python tools/anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,28 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
@@ -56,7 +40,7 @@ def process_image(
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
@@ -171,19 +155,19 @@ def main():
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
python tools/anonymize.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
python tools/anonymize.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
python tools/attribute.py --source path/to/image.jpg
|
||||
python tools/attribute.py --source path/to/video.mp4
|
||||
python tools/attribute.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
@@ -71,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -123,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -162,7 +147,7 @@ def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6)
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -14,8 +14,8 @@ from pathlib import Path
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
@@ -39,7 +39,7 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
python tools/detect.py --source path/to/image.jpg
|
||||
python tools/detect.py --source path/to/video.mp4
|
||||
python tools/detect.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,28 +15,14 @@ from __future__ import annotations
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
@@ -52,7 +38,7 @@ def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: s
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
@@ -60,34 +46,48 @@ def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: s
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
while True:
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
t0 = time.perf_counter()
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
@@ -99,19 +99,28 @@ def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: s
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
inference_fps = 1.0 / max(time.perf_counter() - t0, 1e-9)
|
||||
cv2.putText(frame, f'FPS: {inference_fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
@@ -123,9 +132,10 @@ def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
prev_time = time.perf_counter()
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
@@ -141,10 +151,14 @@ def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
curr_time = time.perf_counter()
|
||||
fps = 1.0 / max(curr_time - prev_time, 1e-9)
|
||||
prev_time = curr_time
|
||||
cv2.putText(frame, f'FPS: {fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
@@ -157,22 +171,33 @@ def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument(
|
||||
'--detector',
|
||||
'--method',
|
||||
type=str,
|
||||
default='retinaface',
|
||||
choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face'],
|
||||
)
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview during video processing')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
elif args.detector == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
elif args.detector == 'yolov5face':
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
else: # yolov8face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
@@ -186,7 +211,12 @@ def main():
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
python tools/emotion.py --source path/to/image.jpg
|
||||
python tools/emotion.py --source path/to/video.mp4
|
||||
python tools/emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
@@ -71,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -123,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -162,7 +147,7 @@ def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -16,28 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
@@ -72,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -124,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -163,7 +147,7 @@ def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
|
||||
208
tools/faiss_search.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FAISS index build and multi-identity face search.
|
||||
|
||||
Build a vector index from a directory of person sub-folders, then search
|
||||
against it in a video or webcam stream.
|
||||
|
||||
Usage:
|
||||
python tools/faiss_search.py build --faces-dir dataset/ --db-path ./vector_index
|
||||
python tools/faiss_search.py run --db-path ./vector_index --source video.mp4
|
||||
python tools/faiss_search.py run --db-path ./vector_index --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import IMAGE_EXTENSIONS, get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import create_detector, create_recognizer
|
||||
from uniface.draw import draw_corner_bbox, draw_text_label
|
||||
from uniface.indexing import FAISS
|
||||
|
||||
|
||||
def _draw_face(image, bbox, text: str, color: tuple[int, int, int]) -> None:
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
font_scale = max(0.4, min(0.7, (y2 - y1) / 200))
|
||||
draw_corner_bbox(image, (x1, y1, x2, y2), color=color, thickness=thickness)
|
||||
draw_text_label(image, text, x1, y1, bg_color=color, font_scale=font_scale)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, store: FAISS, threshold: float = 0.4):
|
||||
faces = detector.detect(frame)
|
||||
if not faces:
|
||||
return frame
|
||||
|
||||
for face in faces:
|
||||
embedding = recognizer.get_normalized_embedding(frame, face.landmarks)
|
||||
result, sim = store.search(embedding, threshold=threshold)
|
||||
|
||||
text = f'{result["person_id"]} ({sim:.2f})' if result else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if result else (0, 0, 255)
|
||||
_draw_face(frame, face.bbox, text, color)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, store: FAISS, video_path: str, save_dir: str, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_faiss_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, store, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, store: FAISS, camera_id: int = 0, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, store, threshold)
|
||||
|
||||
cv2.imshow('Vector Search', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def build(args: argparse.Namespace) -> None:
|
||||
faces_dir = Path(args.faces_dir)
|
||||
if not faces_dir.is_dir():
|
||||
print(f"Error: '{faces_dir}' is not a directory")
|
||||
return
|
||||
|
||||
detector = create_detector()
|
||||
recognizer = create_recognizer()
|
||||
store = FAISS(db_path=args.db_path)
|
||||
|
||||
persons = sorted(p.name for p in faces_dir.iterdir() if p.is_dir())
|
||||
if not persons:
|
||||
print(f"Error: No sub-folders found in '{faces_dir}'")
|
||||
return
|
||||
|
||||
print(f'Found {len(persons)} persons: {", ".join(persons)}')
|
||||
|
||||
total_added = 0
|
||||
for person_id in persons:
|
||||
person_dir = faces_dir / person_id
|
||||
images = [f for f in person_dir.iterdir() if f.suffix.lower() in IMAGE_EXTENSIONS]
|
||||
|
||||
added = 0
|
||||
for img_path in images:
|
||||
image = cv2.imread(str(img_path))
|
||||
if image is None:
|
||||
print(f' Warning: Failed to read {img_path}, skipping')
|
||||
continue
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print(f' Warning: No face detected in {img_path}, skipping')
|
||||
continue
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
store.add(embedding, {'person_id': person_id, 'source': str(img_path)})
|
||||
added += 1
|
||||
|
||||
total_added += added
|
||||
if added:
|
||||
print(f' {person_id}: {added} embeddings added')
|
||||
else:
|
||||
print(f' {person_id}: no valid faces found')
|
||||
|
||||
store.save()
|
||||
print(f'\nIndex saved to {args.db_path} ({total_added} vectors, {len(persons)} persons)')
|
||||
|
||||
|
||||
def run(args: argparse.Namespace) -> None:
|
||||
detector = create_detector()
|
||||
recognizer = create_recognizer()
|
||||
|
||||
store = FAISS(db_path=args.db_path)
|
||||
if not store.load():
|
||||
print(f"Error: No index found at '{args.db_path}'")
|
||||
return
|
||||
print(f'Loaded FAISS index: {store}')
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, store, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, store, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='FAISS vector search')
|
||||
sub = parser.add_subparsers(dest='command', required=True)
|
||||
|
||||
build_p = sub.add_parser('build', help='Build a FAISS index from person sub-folders')
|
||||
build_p.add_argument('--faces-dir', type=str, required=True, help='Directory with person sub-folders')
|
||||
build_p.add_argument('--db-path', type=str, default='./vector_index', help='Where to save the index')
|
||||
|
||||
run_p = sub.add_parser('run', help='Search faces against a FAISS index')
|
||||
run_p.add_argument('--db-path', type=str, required=True, help='Path to saved FAISS index')
|
||||
run_p.add_argument('--source', type=str, required=True, help='Video path or camera ID')
|
||||
run_p.add_argument('--threshold', type=float, default=0.4, help='Similarity threshold')
|
||||
run_p.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.command == 'build':
|
||||
build(args)
|
||||
elif args.command == 'run':
|
||||
run(args)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
python tools/gaze.py --source path/to/image.jpg
|
||||
python tools/gaze.py --source path/to/video.mp4
|
||||
python tools/gaze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,29 +16,13 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_gaze
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
@@ -16,26 +16,11 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
python tools/parse.py --source path/to/image.jpg
|
||||
python tools/parse.py --source path/to/video.mp4
|
||||
python tools/parse.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,30 +16,14 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import vis_parsing_maps
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
@@ -225,7 +209,7 @@ def main():
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
parser = BiSeNet(model_name=args.model)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
python tools/recognize.py --image path/to/image.jpg
|
||||
python tools/recognize.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -41,7 +41,7 @@ def run_inference(detector, recognizer, image_path: str):
|
||||
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
landmarks = faces[0].landmarks # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
@@ -65,8 +65,8 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
landmarks1 = faces1[0].landmarks
|
||||
landmarks2 = faces2[0].landmarks
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
@@ -2,11 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
"""Single-reference face search on video or webcam.
|
||||
|
||||
Given a reference face image, detects faces in the source and shows
|
||||
whether each face matches the reference.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
python tools/search.py --reference ref.jpg --source video.mp4
|
||||
python tools/search.py --reference ref.jpg --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,43 +18,16 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface import create_detector, create_recognizer
|
||||
from uniface.draw import draw_corner_bbox, draw_text_label
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
@@ -60,33 +36,34 @@ def extract_reference_embedding(detector, recognizer, image_path: str) -> np.nda
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
return recognizer.get_normalized_embedding(image, faces[0].landmarks)
|
||||
|
||||
|
||||
def _draw_face(image, bbox, text: str, color: tuple[int, int, int]) -> None:
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
font_scale = max(0.4, min(0.7, (y2 - y1) / 200))
|
||||
draw_corner_bbox(image, (x1, y1, x2, y2), color=color, thickness=thickness)
|
||||
draw_text_label(image, text, x1, y1, bg_color=color, font_scale=font_scale)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
embedding = recognizer.get_normalized_embedding(frame, face.landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
text = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
_draw_face(frame, face.bbox, text, color)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
def process_video(
|
||||
detector, recognizer, video_path: str, save_dir: str, ref_embedding: np.ndarray, threshold: float = 0.4
|
||||
):
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
@@ -123,7 +100,6 @@ def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: s
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
@@ -139,7 +115,7 @@ def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int =
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
cv2.imshow('Face Search', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
@@ -148,17 +124,10 @@ def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int =
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser = argparse.ArgumentParser(description='Single-reference face search')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Similarity threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -166,8 +135,8 @@ def main():
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
detector = create_detector()
|
||||
recognizer = create_recognizer()
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
@@ -180,10 +149,9 @@ def main():
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
process_video(detector, recognizer, args.source, args.save_dir, ref_embedding, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -16,30 +16,14 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
|
||||
199
tools/track.py
Normal file
@@ -0,0 +1,199 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face tracking on video files using ByteTrack.
|
||||
|
||||
Usage:
|
||||
python tools/track.py --source video.mp4
|
||||
python tools/track.py --source video.mp4 --output outputs/tracked.mp4
|
||||
python tools/track.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import VIDEO_EXTENSIONS
|
||||
import cv2
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_tracks
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
|
||||
def _assign_track_ids(faces, tracks) -> list:
|
||||
"""Match tracker outputs back to Face objects by center distance."""
|
||||
if len(tracks) == 0 or len(faces) == 0:
|
||||
return []
|
||||
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2] # (N, 2) -> [cx, cy]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2] # (M, 2) -> [cx, cy]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
return [f for f in faces if f.track_id is not None]
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
tracker: BYTETracker,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.5,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with face tracking."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_tracks = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Tracking', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces if f.confidence >= threshold])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
tracked_faces = _assign_track_ids(faces, tracks)
|
||||
total_tracks += len(tracked_faces)
|
||||
|
||||
# Draw tracked faces
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
|
||||
cv2.putText(frame, f'Tracks: {len(tracked_faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Tracking - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_tracks = total_tracks / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_tracks} tracks ({avg_tracks:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(
|
||||
detector,
|
||||
tracker: BYTETracker,
|
||||
camera_id: int = 0,
|
||||
threshold: float = 0.5,
|
||||
):
|
||||
"""Run real-time face tracking on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces if f.confidence >= threshold])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
tracked_faces = _assign_track_ids(faces, tracks)
|
||||
|
||||
# Draw tracked faces
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
|
||||
cv2.putText(frame, f'Tracks: {len(tracked_faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Tracking', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face tracking on video using ByteTrack')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.5, help='Detection confidence threshold')
|
||||
parser.add_argument('--track-buffer', type=int, default=30, help='Max frames to keep lost tracks')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
tracker = BYTETracker(track_thresh=args.threshold, track_buffer=args.track_buffer)
|
||||
|
||||
if args.source.isdigit():
|
||||
run_camera(detector, tracker, int(args.source), args.threshold)
|
||||
else:
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
ext = Path(args.source).suffix.lower()
|
||||
if ext not in VIDEO_EXTENSIONS:
|
||||
print(f"Error: Unsupported format '{ext}'. Supported: {VIDEO_EXTENSIONS}")
|
||||
return
|
||||
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_tracked.mp4')
|
||||
|
||||
process_video(detector, tracker, args.source, output_path, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,180 +0,0 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
235
tools/xseg.py
Normal file
@@ -0,0 +1,235 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""XSeg face segmentation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/xseg.py --source path/to/image.jpg
|
||||
python tools/xseg.py --source path/to/video.mp4
|
||||
python tools/xseg.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
|
||||
def apply_mask_visualization(image: np.ndarray, mask: np.ndarray, alpha: float = 0.5) -> np.ndarray:
|
||||
"""Apply colored mask overlay for visualization."""
|
||||
overlay = image.copy().astype(np.float32)
|
||||
mask_3ch = np.stack([mask * 0.3, mask * 0.7, mask * 0.3], axis=-1)
|
||||
overlay = overlay * (1 - mask[..., None] * alpha) + mask_3ch * 255 * alpha
|
||||
|
||||
return overlay.clip(0, 255).astype(np.uint8)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector: RetinaFace,
|
||||
parser: XSeg,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
) -> None:
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if len(faces) == 0:
|
||||
print('No faces detected.')
|
||||
return
|
||||
|
||||
# Accumulate masks from all faces
|
||||
full_mask = np.zeros((image.shape[0], image.shape[1]), dtype=np.float32)
|
||||
for i, face in enumerate(faces):
|
||||
if face.landmarks is None:
|
||||
print(f' Face {i + 1}: skipped (no landmarks)')
|
||||
continue
|
||||
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
print(f' Face {i + 1}: done')
|
||||
|
||||
# Apply visualization
|
||||
result_image = apply_mask_visualization(image, full_mask)
|
||||
|
||||
# Draw bounding boxes
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Save results
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_xseg.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
mask_path = os.path.join(save_dir, f'{Path(image_path).stem}_xseg_mask.png')
|
||||
mask_uint8 = (full_mask * 255).astype(np.uint8)
|
||||
cv2.imwrite(mask_path, mask_uint8)
|
||||
print(f'Mask saved: {mask_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector: RetinaFace,
|
||||
parser: XSeg,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
) -> None:
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_xseg.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Accumulate masks from all faces
|
||||
full_mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.float32)
|
||||
for face in faces:
|
||||
if face.landmarks is None:
|
||||
continue
|
||||
mask = parser.parse(frame, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
|
||||
# Apply visualization
|
||||
result_frame = apply_mask_visualization(frame, full_mask)
|
||||
|
||||
# Draw bounding boxes
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(result_frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(result_frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(
|
||||
detector: RetinaFace,
|
||||
parser: XSeg,
|
||||
camera_id: int = 0,
|
||||
) -> None:
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Accumulate masks from all faces
|
||||
full_mask = np.zeros((frame.shape[0], frame.shape[1]), dtype=np.float32)
|
||||
for face in faces:
|
||||
if face.landmarks is None:
|
||||
continue
|
||||
mask = parser.parse(frame, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
|
||||
# Apply visualization
|
||||
result_frame = apply_mask_visualization(frame, full_mask)
|
||||
|
||||
# Draw bounding boxes
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
cv2.rectangle(result_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(result_frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('XSeg Face Segmentation', result_frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
arg_parser = argparse.ArgumentParser(description='XSeg face segmentation')
|
||||
arg_parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
arg_parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
arg_parser.add_argument(
|
||||
'--blur',
|
||||
type=float,
|
||||
default=0,
|
||||
help='Gaussian blur sigma for mask smoothing (default: 0 = raw)',
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
'--align-size',
|
||||
type=int,
|
||||
default=256,
|
||||
help='Face alignment size (default: 256)',
|
||||
)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
parser = XSeg(blur_sigma=args.blur, align_size=args.align_size)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
uniface-cpp/.clang-format
Normal file
@@ -0,0 +1,180 @@
|
||||
---
|
||||
# Modern C++ style based on Google with enhancements
|
||||
Language: Cpp
|
||||
Standard: c++17
|
||||
|
||||
BasedOnStyle: Google
|
||||
ColumnLimit: 100
|
||||
IndentWidth: 4
|
||||
TabWidth: 4
|
||||
UseTab: Never
|
||||
|
||||
# Access modifiers
|
||||
AccessModifierOffset: -4
|
||||
IndentAccessModifiers: false
|
||||
|
||||
# Alignment
|
||||
AlignAfterOpenBracket: BlockIndent
|
||||
AlignArrayOfStructures: Right
|
||||
AlignConsecutiveAssignments:
|
||||
Enabled: false
|
||||
AlignConsecutiveBitFields:
|
||||
Enabled: true
|
||||
AlignConsecutiveDeclarations:
|
||||
Enabled: false
|
||||
AlignConsecutiveMacros:
|
||||
Enabled: true
|
||||
AlignEscapedNewlines: Left
|
||||
AlignOperands: AlignAfterOperator
|
||||
AlignTrailingComments:
|
||||
Kind: Always
|
||||
OverEmptyLines: 1
|
||||
|
||||
# Arguments and parameters
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
|
||||
# Short forms
|
||||
AllowShortBlocksOnASingleLine: Empty
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortEnumsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLambdasOnASingleLine: Inline
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
|
||||
# Break behavior
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
BreakAfterAttributes: Leave
|
||||
BreakBeforeBinaryOperators: None
|
||||
BreakBeforeBraces: Attach
|
||||
BreakBeforeConceptDeclarations: Always
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakInheritanceList: BeforeComma
|
||||
BreakStringLiterals: true
|
||||
|
||||
# Braces
|
||||
InsertBraces: false
|
||||
RemoveBracesLLVM: false
|
||||
|
||||
# Constructors
|
||||
PackConstructorInitializers: CurrentLine
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
|
||||
# Empty lines
|
||||
EmptyLineAfterAccessModifier: Never
|
||||
EmptyLineBeforeAccessModifier: LogicalBlock
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MaxEmptyLinesToKeep: 1
|
||||
SeparateDefinitionBlocks: Always
|
||||
|
||||
# Includes
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
# Main header (same name as source file)
|
||||
- Regex: '^"([a-zA-Z0-9_]+)\.(h|hpp)"$'
|
||||
Priority: 1
|
||||
SortPriority: 1
|
||||
CaseSensitive: true
|
||||
# Project headers
|
||||
- Regex: '^".*"$'
|
||||
Priority: 2
|
||||
SortPriority: 2
|
||||
# C system headers
|
||||
- Regex: '^<(assert|complex|ctype|errno|fenv|float|inttypes|iso646|limits|locale|math|setjmp|signal|stdalign|stdarg|stdatomic|stdbool|stddef|stdint|stdio|stdlib|stdnoreturn|string|tgmath|threads|time|uchar|wchar|wctype)\.h>$'
|
||||
Priority: 3
|
||||
SortPriority: 3
|
||||
# C++ standard library
|
||||
- Regex: '^<[a-z_]+>$'
|
||||
Priority: 4
|
||||
SortPriority: 4
|
||||
# External libraries
|
||||
- Regex: '^<.*>$'
|
||||
Priority: 5
|
||||
SortPriority: 5
|
||||
SortIncludes: CaseSensitive
|
||||
|
||||
# Indentation
|
||||
IndentCaseBlocks: false
|
||||
IndentCaseLabels: true
|
||||
IndentExternBlock: NoIndent
|
||||
IndentGotoLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentRequiresClause: true
|
||||
IndentWrappedFunctionNames: false
|
||||
|
||||
# Lambdas
|
||||
LambdaBodyIndentation: Signature
|
||||
|
||||
# Namespaces
|
||||
CompactNamespaces: false
|
||||
FixNamespaceComments: true
|
||||
NamespaceIndentation: None
|
||||
ShortNamespaceLines: 0
|
||||
|
||||
# Penalties (guide formatting decisions)
|
||||
PenaltyBreakAssignment: 25
|
||||
PenaltyBreakBeforeFirstCallParameter: 19
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakOpenParenthesis: 0
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
|
||||
# Pointers and references
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
ReferenceAlignment: Pointer
|
||||
QualifierAlignment: Leave
|
||||
|
||||
# Requires clause (C++20 concepts)
|
||||
RequiresClausePosition: OwnLine
|
||||
RequiresExpressionIndentation: OuterScope
|
||||
|
||||
# Spacing
|
||||
BitFieldColonSpacing: Both
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: true
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceBeforeSquareBrackets: false
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: Never
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: false
|
||||
SpacesInLineCommentPrefix:
|
||||
Minimum: 1
|
||||
Maximum: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
|
||||
# Other
|
||||
Cpp11BracedListStyle: true
|
||||
InsertNewlineAtEOF: true
|
||||
InsertTrailingCommas: None
|
||||
IntegerLiteralSeparator:
|
||||
Binary: 4
|
||||
Decimal: 3
|
||||
Hex: 4
|
||||
ReflowComments: true
|
||||
RemoveSemicolon: false
|
||||
SortUsingDeclarations: LexicographicNumeric
|
||||
...
|
||||
51
uniface-cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,51 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
|
||||
project(uniface
|
||||
VERSION 1.0.0
|
||||
DESCRIPTION "Uniface C++ face analysis library"
|
||||
LANGUAGES CXX
|
||||
)
|
||||
|
||||
# Options
|
||||
option(UNIFACE_BUILD_EXAMPLES "Build example programs" ON)
|
||||
|
||||
# C++ standard
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
# Compiler warnings
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
add_compile_options(-Wall -Wextra -Wpedantic)
|
||||
elseif(MSVC)
|
||||
add_compile_options(/W4)
|
||||
endif()
|
||||
|
||||
# Find dependencies
|
||||
find_package(OpenCV REQUIRED COMPONENTS core imgproc dnn calib3d)
|
||||
|
||||
# Library
|
||||
add_library(uniface
|
||||
src/utils.cpp
|
||||
src/detector.cpp
|
||||
src/recognizer.cpp
|
||||
src/landmarker.cpp
|
||||
src/analyzer.cpp
|
||||
)
|
||||
|
||||
target_include_directories(uniface
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
|
||||
target_link_libraries(uniface
|
||||
PUBLIC
|
||||
${OpenCV_LIBS}
|
||||
)
|
||||
|
||||
# Examples
|
||||
if(UNIFACE_BUILD_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
69
uniface-cpp/README.md
Normal file
@@ -0,0 +1,69 @@
|
||||
# Uniface C++
|
||||
|
||||
C++ implementation of the Uniface face analysis library.
|
||||
|
||||
## Features
|
||||
|
||||
- **Face Detection** - RetinaFace detector with 5-point landmarks
|
||||
|
||||
## Requirements
|
||||
|
||||
- C++17 compiler
|
||||
- CMake 3.14+
|
||||
- OpenCV 4.x
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
mkdir build && cd build
|
||||
cmake ..
|
||||
make -j$(nproc)
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Image Detection
|
||||
|
||||
```bash
|
||||
./examples/detect <model_path> <image_path>
|
||||
```
|
||||
|
||||
### Webcam Demo
|
||||
|
||||
```bash
|
||||
./examples/webcam <model_path> [camera_id]
|
||||
```
|
||||
|
||||
### Code Example
|
||||
|
||||
```cpp
|
||||
#include <uniface/uniface.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
|
||||
int main() {
|
||||
uniface::RetinaFace detector("retinaface.onnx");
|
||||
|
||||
cv::Mat image = cv::imread("photo.jpg");
|
||||
auto faces = detector.detect(image);
|
||||
|
||||
for (const auto& face : faces) {
|
||||
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
|
||||
}
|
||||
|
||||
cv::imwrite("result.jpg", image);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## Models
|
||||
|
||||
Download models from the main uniface repository or use:
|
||||
|
||||
```bash
|
||||
# RetinaFace MobileNet V2
|
||||
wget https://github.com/your-repo/uniface/releases/download/v1.0/retinaface_mv2.onnx -P models/
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Same license as the main uniface project.
|
||||
23
uniface-cpp/examples/CMakeLists.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
# Examples
|
||||
|
||||
find_package(OpenCV REQUIRED COMPONENTS highgui imgcodecs videoio)
|
||||
|
||||
# Image detection example
|
||||
add_executable(detect detect.cpp)
|
||||
target_link_libraries(detect PRIVATE uniface ${OpenCV_LIBS})
|
||||
|
||||
# Face recognition example
|
||||
add_executable(recognize recognize.cpp)
|
||||
target_link_libraries(recognize PRIVATE uniface ${OpenCV_LIBS})
|
||||
|
||||
# Facial landmarks example
|
||||
add_executable(landmarks landmarks.cpp)
|
||||
target_link_libraries(landmarks PRIVATE uniface ${OpenCV_LIBS})
|
||||
|
||||
# Face analyzer example
|
||||
add_executable(analyzer analyzer.cpp)
|
||||
target_link_libraries(analyzer PRIVATE uniface ${OpenCV_LIBS})
|
||||
|
||||
# Webcam example
|
||||
add_executable(webcam webcam.cpp)
|
||||
target_link_libraries(webcam PRIVATE uniface ${OpenCV_LIBS})
|
||||
113
uniface-cpp/examples/analyzer.cpp
Normal file
@@ -0,0 +1,113 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <uniface/uniface.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 3) {
|
||||
std::cout << "Usage: " << argv[0]
|
||||
<< " <detector_model> <image_path> [recognizer_model] [landmark_model]"
|
||||
<< std::endl;
|
||||
std::cout << "\nAnalyzes faces in an image using available models." << std::endl;
|
||||
std::cout << " - detector_model: Required. Path to face detector ONNX model." << std::endl;
|
||||
std::cout << " - recognizer_model: Optional. Path to face recognizer ONNX model."
|
||||
<< std::endl;
|
||||
std::cout << " - landmark_model: Optional. Path to 106-point landmark ONNX model."
|
||||
<< std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string detector_path = argv[1];
|
||||
const std::string image_path = argv[2];
|
||||
const std::string recognizer_path = (argc > 3) ? argv[3] : "";
|
||||
const std::string landmark_path = (argc > 4) ? argv[4] : "";
|
||||
|
||||
try {
|
||||
// Create analyzer and load components
|
||||
uniface::FaceAnalyzer analyzer;
|
||||
|
||||
std::cout << "Loading detector: " << detector_path << std::endl;
|
||||
analyzer.loadDetector(detector_path);
|
||||
|
||||
if (!recognizer_path.empty()) {
|
||||
std::cout << "Loading recognizer: " << recognizer_path << std::endl;
|
||||
analyzer.loadRecognizer(recognizer_path);
|
||||
}
|
||||
|
||||
if (!landmark_path.empty()) {
|
||||
std::cout << "Loading landmarker: " << landmark_path << std::endl;
|
||||
analyzer.loadLandmarker(landmark_path);
|
||||
}
|
||||
|
||||
// Load image
|
||||
cv::Mat image = cv::imread(image_path);
|
||||
if (image.empty()) {
|
||||
std::cerr << "Failed to load image: " << image_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "\nAnalyzing image..." << std::endl;
|
||||
|
||||
// Analyze faces
|
||||
auto results = analyzer.analyze(image);
|
||||
|
||||
std::cout << "Found " << results.size() << " face(s)\n" << std::endl;
|
||||
|
||||
// Process each face
|
||||
for (size_t i = 0; i < results.size(); ++i) {
|
||||
const auto& result = results[i];
|
||||
|
||||
std::cout << "Face " << (i + 1) << ":" << std::endl;
|
||||
std::cout << " BBox: [" << result.face.bbox.x << ", " << result.face.bbox.y << ", "
|
||||
<< result.face.bbox.width << ", " << result.face.bbox.height << "]"
|
||||
<< std::endl;
|
||||
std::cout << std::fixed << std::setprecision(3);
|
||||
std::cout << " Confidence: " << result.face.confidence << std::endl;
|
||||
|
||||
// Draw bounding box
|
||||
cv::rectangle(image, result.face.bbox, cv::Scalar(0, 255, 0), 2);
|
||||
|
||||
// Draw 5-point landmarks from detector
|
||||
for (const auto& pt : result.face.landmarks) {
|
||||
cv::circle(image, pt, 3, cv::Scalar(0, 0, 255), -1);
|
||||
}
|
||||
|
||||
// If 106-point landmarks available
|
||||
if (result.landmarks) {
|
||||
std::cout << " Landmarks: 106 points detected" << std::endl;
|
||||
for (const auto& pt : result.landmarks->points) {
|
||||
cv::circle(image, pt, 1, cv::Scalar(0, 255, 255), -1);
|
||||
}
|
||||
}
|
||||
|
||||
// If embedding available
|
||||
if (result.embedding) {
|
||||
// Show first few values of embedding
|
||||
std::cout << " Embedding: [";
|
||||
for (size_t j = 0; j < 5; ++j) {
|
||||
std::cout << (*result.embedding)[j];
|
||||
if (j < 4)
|
||||
std::cout << ", ";
|
||||
}
|
||||
std::cout << ", ... ] (512-dim)" << std::endl;
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Save result
|
||||
cv::imwrite("analyzer_result.jpg", image);
|
||||
std::cout << "Saved result to analyzer_result.jpg" << std::endl;
|
||||
|
||||
} catch (const cv::Exception& e) {
|
||||
std::cerr << "OpenCV Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
48
uniface-cpp/examples/detect.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <uniface/uniface.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 3) {
|
||||
std::cout << "Usage: " << argv[0] << " <model_path> <image_path>" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string model_path = argv[1];
|
||||
const std::string image_path = argv[2];
|
||||
|
||||
try {
|
||||
uniface::RetinaFace detector(model_path);
|
||||
|
||||
cv::Mat image = cv::imread(image_path);
|
||||
if (image.empty()) {
|
||||
std::cerr << "Failed to load image: " << image_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const auto faces = detector.detect(image);
|
||||
std::cout << "Detected " << faces.size() << " faces." << std::endl;
|
||||
|
||||
// Draw results
|
||||
for (const auto& face : faces) {
|
||||
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
|
||||
for (const auto& pt : face.landmarks) {
|
||||
cv::circle(image, pt, 2, cv::Scalar(0, 0, 255), -1);
|
||||
}
|
||||
}
|
||||
|
||||
cv::imwrite("result.jpg", image);
|
||||
std::cout << "Saved result to result.jpg" << std::endl;
|
||||
|
||||
} catch (const cv::Exception& e) {
|
||||
std::cerr << "OpenCV Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
66
uniface-cpp/examples/landmarks.cpp
Normal file
@@ -0,0 +1,66 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <uniface/uniface.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 4) {
|
||||
std::cout << "Usage: " << argv[0] << " <detector_model> <landmark_model> <image_path>"
|
||||
<< std::endl;
|
||||
std::cout << "\nDetects 106-point facial landmarks and saves visualization." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string detector_path = argv[1];
|
||||
const std::string landmark_path = argv[2];
|
||||
const std::string image_path = argv[3];
|
||||
|
||||
try {
|
||||
// Load models
|
||||
uniface::RetinaFace detector(detector_path);
|
||||
uniface::Landmark106 landmarker(landmark_path);
|
||||
|
||||
// Load image
|
||||
cv::Mat image = cv::imread(image_path);
|
||||
if (image.empty()) {
|
||||
std::cerr << "Failed to load image: " << image_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Detect faces
|
||||
auto faces = detector.detect(image);
|
||||
std::cout << "Detected " << faces.size() << " face(s)" << std::endl;
|
||||
|
||||
// Process each face
|
||||
for (size_t i = 0; i < faces.size(); ++i) {
|
||||
const auto& face = faces[i];
|
||||
|
||||
// Draw bounding box
|
||||
cv::rectangle(image, face.bbox, cv::Scalar(0, 255, 0), 2);
|
||||
|
||||
// Get 106-point landmarks
|
||||
auto landmarks = landmarker.getLandmarks(image, face.bbox);
|
||||
|
||||
// Draw all 106 points
|
||||
for (const auto& pt : landmarks.points) {
|
||||
cv::circle(image, pt, 1, cv::Scalar(0, 255, 255), -1);
|
||||
}
|
||||
|
||||
std::cout << "Face " << (i + 1) << ": 106 landmarks detected" << std::endl;
|
||||
}
|
||||
|
||||
// Save result
|
||||
cv::imwrite("landmarks_result.jpg", image);
|
||||
std::cout << "Saved result to landmarks_result.jpg" << std::endl;
|
||||
|
||||
} catch (const cv::Exception& e) {
|
||||
std::cerr << "OpenCV Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
81
uniface-cpp/examples/recognize.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <uniface/uniface.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 5) {
|
||||
std::cout << "Usage: " << argv[0]
|
||||
<< " <detector_model> <recognizer_model> <image1> <image2>" << std::endl;
|
||||
std::cout << "\nCompares faces from two images and outputs similarity score." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string detector_path = argv[1];
|
||||
const std::string recognizer_path = argv[2];
|
||||
const std::string image1_path = argv[3];
|
||||
const std::string image2_path = argv[4];
|
||||
|
||||
try {
|
||||
// Load models
|
||||
uniface::RetinaFace detector(detector_path);
|
||||
uniface::ArcFace recognizer(recognizer_path);
|
||||
|
||||
// Load images
|
||||
cv::Mat image1 = cv::imread(image1_path);
|
||||
cv::Mat image2 = cv::imread(image2_path);
|
||||
|
||||
if (image1.empty()) {
|
||||
std::cerr << "Failed to load image: " << image1_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (image2.empty()) {
|
||||
std::cerr << "Failed to load image: " << image2_path << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Detect faces
|
||||
auto faces1 = detector.detect(image1);
|
||||
auto faces2 = detector.detect(image2);
|
||||
|
||||
if (faces1.empty()) {
|
||||
std::cerr << "No face detected in image1" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
if (faces2.empty()) {
|
||||
std::cerr << "No face detected in image2" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cout << "Detected " << faces1.size() << " face(s) in image1" << std::endl;
|
||||
std::cout << "Detected " << faces2.size() << " face(s) in image2" << std::endl;
|
||||
|
||||
// Get embeddings for first face in each image
|
||||
auto embedding1 = recognizer.getNormalizedEmbedding(image1, faces1[0].landmarks);
|
||||
auto embedding2 = recognizer.getNormalizedEmbedding(image2, faces2[0].landmarks);
|
||||
|
||||
// Compute similarity
|
||||
float similarity = uniface::cosineSimilarity(embedding1, embedding2);
|
||||
|
||||
std::cout << std::fixed << std::setprecision(4);
|
||||
std::cout << "\nCosine Similarity: " << similarity << std::endl;
|
||||
|
||||
// Interpretation
|
||||
if (similarity > 0.4f) {
|
||||
std::cout << "Result: Same person (similarity > 0.4)" << std::endl;
|
||||
} else {
|
||||
std::cout << "Result: Different persons (similarity <= 0.4)" << std::endl;
|
||||
}
|
||||
|
||||
} catch (const cv::Exception& e) {
|
||||
std::cerr << "OpenCV Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
209
uniface-cpp/examples/webcam.cpp
Normal file
@@ -0,0 +1,209 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/imgproc.hpp>
|
||||
#include <opencv2/videoio.hpp>
|
||||
#include <uniface/uniface.hpp>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 2) {
|
||||
std::cout << "Usage: " << argv[0] << " <detector_model> [landmark_model] [camera_id]"
|
||||
<< std::endl;
|
||||
std::cout << "\nArguments:" << std::endl;
|
||||
std::cout << " detector_model : Path to face detector ONNX model (required)" << std::endl;
|
||||
std::cout << " landmark_model : Path to 106-point landmark ONNX model (optional)"
|
||||
<< std::endl;
|
||||
std::cout << " camera_id : Camera device ID, default 0 (optional)" << std::endl;
|
||||
std::cout << "\nExamples:" << std::endl;
|
||||
std::cout << " " << argv[0] << " detector.onnx" << std::endl;
|
||||
std::cout << " " << argv[0] << " detector.onnx landmark.onnx" << std::endl;
|
||||
std::cout << " " << argv[0] << " detector.onnx landmark.onnx 1" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const std::string detector_path = argv[1];
|
||||
std::string landmark_path;
|
||||
int camera_id = 0;
|
||||
|
||||
// Parse arguments - landmark_model is optional
|
||||
if (argc >= 3) {
|
||||
// Check if argv[2] is a number (camera_id) or a path (landmark_model)
|
||||
if (std::isdigit(argv[2][0]) && strlen(argv[2]) <= 2) {
|
||||
camera_id = std::atoi(argv[2]);
|
||||
} else {
|
||||
landmark_path = argv[2];
|
||||
if (argc >= 4) {
|
||||
camera_id = std::atoi(argv[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Load detector
|
||||
std::cout << "Loading detector: " << detector_path << std::endl;
|
||||
uniface::RetinaFace detector(detector_path);
|
||||
std::cout << "Detector loaded!" << std::endl;
|
||||
|
||||
// Load landmark model if provided
|
||||
std::unique_ptr<uniface::Landmark106> landmarker;
|
||||
if (!landmark_path.empty()) {
|
||||
std::cout << "Loading landmarker: " << landmark_path << std::endl;
|
||||
landmarker = std::make_unique<uniface::Landmark106>(landmark_path);
|
||||
std::cout << "Landmarker loaded!" << std::endl;
|
||||
}
|
||||
|
||||
// Open camera
|
||||
cv::VideoCapture cap(camera_id);
|
||||
if (!cap.isOpened()) {
|
||||
std::cerr << "Error: Cannot open camera " << camera_id << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
const int frame_width = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
|
||||
const int frame_height = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
|
||||
std::cout << "\nCamera opened: " << frame_width << "x" << frame_height << std::endl;
|
||||
std::cout << "Press 'q' to quit, 's' to save screenshot, 'l' to toggle landmarks"
|
||||
<< std::endl;
|
||||
|
||||
cv::Mat frame;
|
||||
int frame_count = 0;
|
||||
double total_time = 0.0;
|
||||
bool show_landmarks = true; // Toggle for 106-point landmarks
|
||||
|
||||
while (true) {
|
||||
cap >> frame;
|
||||
if (frame.empty()) {
|
||||
std::cerr << "Error: Empty frame captured" << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
const auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Detect faces
|
||||
const auto faces = detector.detect(frame);
|
||||
|
||||
// Get 106-point landmarks if available
|
||||
std::vector<uniface::Landmarks> all_landmarks;
|
||||
if (landmarker && show_landmarks) {
|
||||
all_landmarks.reserve(faces.size());
|
||||
for (const auto& face : faces) {
|
||||
all_landmarks.push_back(landmarker->getLandmarks(frame, face.bbox));
|
||||
}
|
||||
}
|
||||
|
||||
const auto end = std::chrono::high_resolution_clock::now();
|
||||
const std::chrono::duration<double, std::milli> elapsed = end - start;
|
||||
const double inference_time = elapsed.count();
|
||||
|
||||
++frame_count;
|
||||
total_time += inference_time;
|
||||
const double avg_time = total_time / static_cast<double>(frame_count);
|
||||
const double fps = 1000.0 / avg_time;
|
||||
|
||||
// Draw results
|
||||
for (size_t i = 0; i < faces.size(); ++i) {
|
||||
const auto& face = faces[i];
|
||||
|
||||
// Draw bounding box
|
||||
cv::rectangle(frame, face.bbox, cv::Scalar(0, 255, 0), 2);
|
||||
|
||||
// Draw 5-point landmarks from detector
|
||||
for (size_t j = 0; j < face.landmarks.size(); ++j) {
|
||||
cv::Scalar color;
|
||||
if (j < 2) {
|
||||
color = cv::Scalar(255, 0, 0); // Eyes - Blue
|
||||
} else if (j == 2) {
|
||||
color = cv::Scalar(0, 255, 0); // Nose - Green
|
||||
} else {
|
||||
color = cv::Scalar(0, 0, 255); // Mouth - Red
|
||||
}
|
||||
cv::circle(frame, face.landmarks[j], 3, color, -1);
|
||||
}
|
||||
|
||||
// Draw 106-point landmarks if available
|
||||
if (i < all_landmarks.size()) {
|
||||
const auto& lm = all_landmarks[i];
|
||||
|
||||
// Draw all 106 points
|
||||
for (const auto& pt : lm.points) {
|
||||
cv::circle(frame, pt, 1, cv::Scalar(0, 255, 255), -1);
|
||||
}
|
||||
}
|
||||
|
||||
// Draw confidence
|
||||
const std::string conf_text = cv::format("%.2f", face.confidence);
|
||||
const cv::Point text_org(
|
||||
static_cast<int>(face.bbox.x), static_cast<int>(face.bbox.y) - 5
|
||||
);
|
||||
cv::putText(
|
||||
frame,
|
||||
conf_text,
|
||||
text_org,
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
cv::Scalar(0, 255, 0),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
// Draw info overlay
|
||||
std::string mode = landmarker
|
||||
? (show_landmarks ? "Detection + 106 Landmarks" : "Detection Only")
|
||||
: "Detection Only";
|
||||
const std::string info_text = cv::format(
|
||||
"FPS: %.1f | Faces: %zu | Time: %.1fms", fps, faces.size(), inference_time
|
||||
);
|
||||
cv::putText(
|
||||
frame,
|
||||
info_text,
|
||||
cv::Point(10, 30),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
cv::Scalar(0, 255, 0),
|
||||
2
|
||||
);
|
||||
cv::putText(
|
||||
frame,
|
||||
mode,
|
||||
cv::Point(10, 60),
|
||||
cv::FONT_HERSHEY_SIMPLEX,
|
||||
0.6,
|
||||
cv::Scalar(255, 255, 0),
|
||||
2
|
||||
);
|
||||
|
||||
cv::imshow("Uniface - Face Detection & Landmarks", frame);
|
||||
|
||||
const char key = static_cast<char>(cv::waitKey(1));
|
||||
if (key == 'q' || key == 27) {
|
||||
break;
|
||||
} else if (key == 's') {
|
||||
const std::string filename = cv::format("screenshot_%d.jpg", frame_count);
|
||||
cv::imwrite(filename, frame);
|
||||
std::cout << "Screenshot saved: " << filename << std::endl;
|
||||
} else if (key == 'l' && landmarker) {
|
||||
show_landmarks = !show_landmarks;
|
||||
std::cout << "106-point landmarks: " << (show_landmarks ? "ON" : "OFF")
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
cap.release();
|
||||
cv::destroyAllWindows();
|
||||
|
||||
std::cout << "\n=== Statistics ===" << std::endl;
|
||||
std::cout << "Total frames: " << frame_count << std::endl;
|
||||
std::cout << "Average inference time: " << (total_time / frame_count) << " ms" << std::endl;
|
||||
|
||||
} catch (const cv::Exception& e) {
|
||||
std::cerr << "OpenCV Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "Error: " << e.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
63
uniface-cpp/include/uniface/analyzer.hpp
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef UNIFACE_ANALYZER_HPP_
|
||||
#define UNIFACE_ANALYZER_HPP_
|
||||
|
||||
#include "uniface/detector.hpp"
|
||||
#include "uniface/landmarker.hpp"
|
||||
#include "uniface/recognizer.hpp"
|
||||
#include "uniface/types.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace uniface {
|
||||
|
||||
// Result of face analysis
|
||||
struct AnalyzedFace {
|
||||
Face face; // detection result (bbox, confidence, 5-point landmarks)
|
||||
std::optional<Landmarks> landmarks; // 106-point landmarks (if landmarker loaded)
|
||||
std::optional<Embedding> embedding; // face embedding (if recognizer loaded)
|
||||
};
|
||||
|
||||
// Unified face analysis combining detection, recognition, and landmarks
|
||||
class FaceAnalyzer {
|
||||
public:
|
||||
FaceAnalyzer() = default;
|
||||
~FaceAnalyzer() = default;
|
||||
|
||||
FaceAnalyzer(const FaceAnalyzer&) = delete;
|
||||
FaceAnalyzer& operator=(const FaceAnalyzer&) = delete;
|
||||
FaceAnalyzer(FaceAnalyzer&&) = default;
|
||||
FaceAnalyzer& operator=(FaceAnalyzer&&) = default;
|
||||
|
||||
// Load components (returns *this for chaining)
|
||||
FaceAnalyzer& loadDetector(const std::string& path, const DetectorConfig& config = DetectorConfig{});
|
||||
FaceAnalyzer& loadRecognizer(const std::string& path, const RecognizerConfig& config = RecognizerConfig{});
|
||||
FaceAnalyzer& loadLandmarker(const std::string& path, const LandmarkerConfig& config = LandmarkerConfig{});
|
||||
|
||||
// Analyze faces in BGR image (throws if detector not loaded)
|
||||
[[nodiscard]] std::vector<AnalyzedFace> analyze(const cv::Mat& image);
|
||||
|
||||
// Component checks
|
||||
[[nodiscard]] bool hasDetector() const noexcept { return detector_ != nullptr; }
|
||||
[[nodiscard]] bool hasRecognizer() const noexcept { return recognizer_ != nullptr; }
|
||||
[[nodiscard]] bool hasLandmarker() const noexcept { return landmarker_ != nullptr; }
|
||||
|
||||
// Direct component access
|
||||
[[nodiscard]] RetinaFace* detector() noexcept { return detector_.get(); }
|
||||
[[nodiscard]] ArcFace* recognizer() noexcept { return recognizer_.get(); }
|
||||
[[nodiscard]] Landmark106* landmarker() noexcept { return landmarker_.get(); }
|
||||
[[nodiscard]] const RetinaFace* detector() const noexcept { return detector_.get(); }
|
||||
[[nodiscard]] const ArcFace* recognizer() const noexcept { return recognizer_.get(); }
|
||||
[[nodiscard]] const Landmark106* landmarker() const noexcept { return landmarker_.get(); }
|
||||
|
||||
private:
|
||||
std::unique_ptr<RetinaFace> detector_;
|
||||
std::unique_ptr<ArcFace> recognizer_;
|
||||
std::unique_ptr<Landmark106> landmarker_;
|
||||
};
|
||||
|
||||
} // namespace uniface
|
||||
|
||||
#endif // UNIFACE_ANALYZER_HPP_
|
||||