mirror of
https://github.com/yakhyo/uniface.git
synced 2026-05-16 13:47:53 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
971775b2e8 | ||
|
|
c520ea2df2 | ||
|
|
2a8cb54d31 |
@@ -82,23 +82,23 @@ def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
method: Detection method. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> from uniface import create_detector
|
||||
>>> detector = create_detector('retinaface', confidence_threshold=0.8)
|
||||
>>> faces = detector.detect(image)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
@@ -174,16 +174,16 @@ When adding a new model or feature:
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Example | Notebook |
|
||||
| ------------------ | ------------------------------------------------------------------- |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
|
||||
25
README.md
25
README.md
@@ -9,6 +9,7 @@
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://yakhyo.github.io/uniface/)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
</div>
|
||||
|
||||
@@ -26,6 +27,7 @@
|
||||
|
||||
- **Face Detection** — RetinaFace, SCRFD, YOLOv5-Face, and YOLOv8-Face with 5-point landmarks
|
||||
- **Face Recognition** — ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Face Tracking** — Multi-object tracking with [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) for persistent IDs across video frames
|
||||
- **Facial Landmarks** — 106-point landmark localization module (separate from 5-point detector landmarks)
|
||||
- **Face Parsing** — BiSeNet semantic segmentation (19 classes), XSeg face masking
|
||||
- **Gaze Estimation** — Real-time gaze direction with MobileGaze
|
||||
@@ -71,7 +73,18 @@ Models are downloaded automatically on first use and verified via SHA-256.
|
||||
|
||||
Default cache location: `~/.uniface/models`
|
||||
|
||||
You can override it with `UNIFACE_CACHE_DIR=/your/cache/path`
|
||||
Override with the programmatic API or environment variable:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
```
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/data/models
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -79,7 +92,7 @@ You can override it with `UNIFACE_CACHE_DIR=/your/cache/path`
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -106,7 +119,9 @@ for face in faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -128,7 +143,7 @@ for face in faces:
|
||||
## Execution Providers (ONNX Runtime)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Force CPU-only inference
|
||||
detector = RetinaFace(providers=["CPUExecutionProvider"])
|
||||
@@ -189,6 +204,7 @@ If you plan commercial use, verify model license compatibility.
|
||||
| Detection | [retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) | ✓ | RetinaFace PyTorch Training & Export |
|
||||
| Detection | [yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) | - | YOLOv5-Face ONNX Inference |
|
||||
| Detection | [yolov8-face-onnx-inference](https://github.com/yakhyo/yolov8-face-onnx-inference) | - | YOLOv8-Face ONNX Inference |
|
||||
| Tracking | [bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) | - | BYTETracker Multi-Object Tracking |
|
||||
| Recognition | [face-recognition](https://github.com/yakhyo/face-recognition) | ✓ | MobileFace, SphereFace Training |
|
||||
| Parsing | [face-parsing](https://github.com/yakhyo/face-parsing) | ✓ | BiSeNet Face Parsing |
|
||||
| Parsing | [face-segmentation](https://github.com/yakhyo/face-segmentation) | - | XSeg Face Segmentation |
|
||||
@@ -209,6 +225,7 @@ Contributions are welcome. Please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
If you find this project useful, consider giving it a ⭐ on GitHub — it helps others discover it!
|
||||
|
||||
Questions or feedback:
|
||||
- Discord: https://discord.gg/wdzrjr7R5j
|
||||
- GitHub Issues: https://github.com/yakhyo/uniface/issues
|
||||
- DeepWiki Q&A: https://deepwiki.com/yakhyo/uniface
|
||||
|
||||
|
||||
@@ -93,7 +93,7 @@ landmarks = face.landmarks # Shape: (5, 2)
|
||||
Returned by `Landmark106`:
|
||||
|
||||
```python
|
||||
from uniface import Landmark106
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, face.bbox)
|
||||
@@ -174,7 +174,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
Face alignment uses 5-point landmarks to normalize face orientation:
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face to standard template
|
||||
aligned_face = face_alignment(image, face.landmarks)
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace uses ONNX Runtime for model inference, which supports multiple hardware
|
||||
UniFace automatically selects the optimal execution provider based on available hardware:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Automatically uses best available provider
|
||||
detector = RetinaFace()
|
||||
@@ -28,7 +28,8 @@ detector = RetinaFace()
|
||||
You can specify which execution provider to use by passing the `providers` parameter:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Force CPU execution (even if GPU is available)
|
||||
detector = RetinaFace(providers=['CPUExecutionProvider'])
|
||||
@@ -174,7 +175,7 @@ pip install uniface[gpu]
|
||||
Smaller input sizes are faster but may reduce accuracy:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Faster, lower accuracy
|
||||
detector = RetinaFace(input_size=(320, 320))
|
||||
|
||||
@@ -53,6 +53,7 @@ class Face:
|
||||
race: str | None = None # "East Asian", etc.
|
||||
emotion: str | None = None # "Happy", etc.
|
||||
emotion_confidence: float | None = None
|
||||
track_id: int | None = None # Persistent ID from tracker
|
||||
```
|
||||
|
||||
### Properties
|
||||
@@ -177,7 +178,7 @@ print(f"Norm: {np.linalg.norm(embedding):.4f}") # ~1.0
|
||||
### Similarity Computation
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
# Returns: float between -1 and 1 (cosine similarity)
|
||||
|
||||
@@ -9,7 +9,7 @@ UniFace automatically downloads and caches models. This page explains how model
|
||||
Models are downloaded on first use:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# First run: downloads model to cache
|
||||
detector = RetinaFace() # ~3.5 MB download
|
||||
@@ -44,44 +44,57 @@ Default cache directory:
|
||||
|
||||
## Custom Cache Directory
|
||||
|
||||
Specify a custom cache location:
|
||||
Use the programmatic API to change the cache location at runtime:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
# Download to custom directory
|
||||
model_path = verify_model_weights(
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
root='./my_models'
|
||||
)
|
||||
print(f"Model at: {model_path}")
|
||||
# Set a custom cache directory
|
||||
set_cache_dir('/data/models')
|
||||
|
||||
# Verify the current path
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# All subsequent model loads use the new directory
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Downloads to /data/models/
|
||||
```
|
||||
|
||||
Or set the `UNIFACE_CACHE_DIR` environment variable (see [Environment Variables](#environment-variables) below).
|
||||
|
||||
---
|
||||
|
||||
## Pre-Download Models
|
||||
|
||||
Download models before deployment:
|
||||
Download models before deployment using the concurrent downloader:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.model_store import download_models
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
ArcFaceWeights,
|
||||
AgeGenderWeights,
|
||||
)
|
||||
|
||||
# Download all needed models
|
||||
models = [
|
||||
# Download multiple models concurrently (up to 4 threads by default)
|
||||
paths = download_models([
|
||||
RetinaFaceWeights.MNET_V2,
|
||||
ArcFaceWeights.MNET,
|
||||
AgeGenderWeights.DEFAULT,
|
||||
]
|
||||
])
|
||||
|
||||
for model in models:
|
||||
path = verify_model_weights(model)
|
||||
print(f"Downloaded: {path}")
|
||||
for model, path in paths.items():
|
||||
print(f"{model.value} -> {path}")
|
||||
```
|
||||
|
||||
Or download one at a time:
|
||||
|
||||
```python
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
print(f"Downloaded: {path}")
|
||||
```
|
||||
|
||||
Or use the CLI tool:
|
||||
@@ -115,11 +128,20 @@ print(f"Copy from: {path}")
|
||||
scp -r ~/.uniface/models/ user@offline-machine:~/.uniface/models/
|
||||
```
|
||||
|
||||
### 3. Use normally
|
||||
### 3. Point to the cache (if non-default location)
|
||||
|
||||
```python
|
||||
from uniface.model_store import set_cache_dir
|
||||
|
||||
# Only needed if the models are not at ~/.uniface/models/
|
||||
set_cache_dir('/path/to/copied/models')
|
||||
```
|
||||
|
||||
### 4. Use normally
|
||||
|
||||
```python
|
||||
# Models load from local cache
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # No network required
|
||||
```
|
||||
|
||||
@@ -182,7 +204,12 @@ If a model fails verification, it's re-downloaded automatically.
|
||||
|
||||
## Clear Cache
|
||||
|
||||
Remove cached models:
|
||||
Find and remove cached models:
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir
|
||||
print(get_cache_dir()) # shows the active cache path
|
||||
```
|
||||
|
||||
```bash
|
||||
# Remove all cached models
|
||||
@@ -198,20 +225,35 @@ Models will be re-downloaded on next use.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set custom cache location via environment variable:
|
||||
There are three equivalent ways to configure the cache directory:
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
**1. Programmatic API (recommended)**
|
||||
|
||||
```python
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
|
||||
set_cache_dir('/path/to/custom/cache')
|
||||
print(get_cache_dir()) # /path/to/custom/cache
|
||||
```
|
||||
|
||||
**2. Direct environment variable (Python)**
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ['UNIFACE_CACHE_DIR'] = '/path/to/custom/cache'
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace() # Uses custom cache
|
||||
```
|
||||
|
||||
**3. Shell environment variable**
|
||||
|
||||
```bash
|
||||
export UNIFACE_CACHE_DIR=/path/to/custom/cache
|
||||
```
|
||||
|
||||
All three methods set the same `UNIFACE_CACHE_DIR` environment variable under the hood. `get_cache_dir()` always returns the resolved path.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
@@ -28,6 +28,10 @@ graph TB
|
||||
PRIV[Privacy]
|
||||
end
|
||||
|
||||
subgraph Tracking
|
||||
TRK[BYTETracker]
|
||||
end
|
||||
|
||||
subgraph Output
|
||||
FACE[Face Objects]
|
||||
end
|
||||
@@ -40,9 +44,11 @@ graph TB
|
||||
DET --> PARSE
|
||||
DET --> SPOOF
|
||||
DET --> PRIV
|
||||
DET --> TRK
|
||||
REC --> FACE
|
||||
LMK --> FACE
|
||||
ATTR --> FACE
|
||||
TRK --> FACE
|
||||
```
|
||||
|
||||
---
|
||||
@@ -74,12 +80,14 @@ tqdm # Progress bars
|
||||
Factory functions and direct instantiation:
|
||||
|
||||
```python
|
||||
# Factory function
|
||||
detector = create_detector('retinaface')
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Direct instantiation (recommended)
|
||||
from uniface import RetinaFace
|
||||
detector = RetinaFace()
|
||||
|
||||
# Or via factory function
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
```
|
||||
|
||||
### 4. Type Safety
|
||||
@@ -99,6 +107,7 @@ def detect(self, image: np.ndarray) -> list[Face]:
|
||||
uniface/
|
||||
├── detection/ # Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
├── recognition/ # Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
├── tracking/ # Multi-object tracking (BYTETracker)
|
||||
├── landmark/ # 106-point landmarks
|
||||
├── attribute/ # Age, gender, emotion, race
|
||||
├── parsing/ # Face semantic segmentation
|
||||
@@ -109,7 +118,7 @@ uniface/
|
||||
├── constants.py # Model weights and URLs
|
||||
├── model_store.py # Model download and caching
|
||||
├── onnx_utils.py # ONNX Runtime utilities
|
||||
└── visualization.py # Drawing utilities
|
||||
└── draw.py # Drawing utilities
|
||||
```
|
||||
|
||||
---
|
||||
@@ -120,7 +129,9 @@ A typical face analysis workflow:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# 1. Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -151,7 +162,10 @@ for face in faces:
|
||||
For convenience, `FaceAnalyzer` combines multiple modules:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer, RetinaFace, ArcFace, AgeGender, FairFace
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -176,7 +190,7 @@ for face in faces:
|
||||
## Model Lifecycle
|
||||
|
||||
1. **First use**: Model is downloaded from GitHub releases
|
||||
2. **Cached**: Stored in `~/.uniface/models/`
|
||||
2. **Cached**: Stored in `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR`)
|
||||
3. **Verified**: SHA-256 checksum validation
|
||||
4. **Loaded**: ONNX Runtime session created
|
||||
5. **Inference**: Hardware-accelerated execution
|
||||
@@ -185,6 +199,11 @@ for face in faces:
|
||||
# Models auto-download on first use
|
||||
detector = RetinaFace() # Downloads if not cached
|
||||
|
||||
# Optionally configure cache location
|
||||
from uniface.model_store import get_cache_dir, set_cache_dir
|
||||
set_cache_dir('/data/models')
|
||||
print(get_cache_dir()) # /data/models
|
||||
|
||||
# Or manually pre-download
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
@@ -11,7 +11,7 @@ This page explains how to tune detection and recognition thresholds for your use
|
||||
Controls minimum confidence for face detection:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Default (balanced)
|
||||
detector = RetinaFace(confidence_threshold=0.5)
|
||||
@@ -81,7 +81,7 @@ For identity verification (same person check):
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
|
||||
@@ -199,7 +199,7 @@ else:
|
||||
For drawing detections, filter by confidence:
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Only draw high-confidence detections
|
||||
bboxes = [f.bbox for f in faces if f.confidence > 0.7]
|
||||
|
||||
@@ -18,8 +18,9 @@ template: home.html
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/projects/uniface)
|
||||
[](https://www.kaggle.com/yakhyokhuja/code)
|
||||
[](https://discord.gg/wdzrjr7R5j)
|
||||
|
||||
<img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/new/uniface_rounded_q80.webp" alt="UniFace - All-in-One Open-Source Face Analysis Library" style="max-width: 90%; margin: 1rem 0;">
|
||||
<!-- <img src="https://raw.githubusercontent.com/yakhyo/uniface/main/.github/logos/new/uniface_rounded_q80.webp" alt="UniFace - All-in-One Open-Source Face Analysis Library" style="max-width: 70%; margin: 1rem 0;"> -->
|
||||
|
||||
[Get Started](quickstart.md){ .md-button .md-button--primary }
|
||||
[View on GitHub](https://github.com/yakhyo/uniface){ .md-button }
|
||||
@@ -58,6 +59,11 @@ BiSeNet semantic segmentation with 19 facial component classes.
|
||||
Real-time gaze direction prediction with MobileGaze models.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-motion-play: Tracking
|
||||
Multi-object tracking with BYTETracker for persistent face IDs across video frames.
|
||||
</div>
|
||||
|
||||
<div class="feature-card" markdown>
|
||||
### :material-shield-check: Anti-Spoofing
|
||||
Face liveness detection with MiniFASNet to prevent fraud.
|
||||
|
||||
@@ -128,7 +128,7 @@ import onnxruntime as ort
|
||||
print(f"Available providers: {ort.get_available_providers()}")
|
||||
|
||||
# Quick test
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
detector = RetinaFace()
|
||||
print("Installation successful!")
|
||||
```
|
||||
|
||||
@@ -22,7 +22,7 @@ RetinaFace models are trained on the WIDER FACE dataset.
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image>`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
@@ -38,7 +38,7 @@ SCRFD (Sample and Computation Redistribution for Efficient Face Detection) model
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image>`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
---
|
||||
|
||||
@@ -55,7 +55,7 @@ YOLOv5-Face models provide detection with 5-point facial landmarks, trained on W
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image>`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image>`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv5-Face models use a fixed input size of 640×640.
|
||||
@@ -74,7 +74,7 @@ YOLOv8-Face models use anchor-free design with DFL (Distribution Focal Loss) for
|
||||
!!! info "Accuracy & Benchmarks"
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets)
|
||||
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detection.py --source <image> --method yolov8face`
|
||||
**Speed**: Benchmark on your own hardware using `python tools/detect.py --source <image> --method yolov8face`
|
||||
|
||||
!!! note "Fixed Input Size"
|
||||
All YOLOv8-Face models use a fixed input size of 640×640.
|
||||
@@ -219,7 +219,7 @@ Facial landmark localization model.
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Angry
|
||||
|
||||
**Classes (8)**: Above + Contempt
|
||||
|
||||
@@ -279,13 +279,13 @@ BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segme
|
||||
|
||||
| # | Class | # | Class | # | Class |
|
||||
|---|-------|---|-------|---|-------|
|
||||
| 1 | Background | 8 | Left Ear | 15 | Neck |
|
||||
| 2 | Skin | 9 | Right Ear | 16 | Neck Lace |
|
||||
| 3 | Left Eyebrow | 10 | Ear Ring | 17 | Cloth |
|
||||
| 4 | Right Eyebrow | 11 | Nose | 18 | Hair |
|
||||
| 5 | Left Eye | 12 | Mouth | 19 | Hat |
|
||||
| 6 | Right Eye | 13 | Upper Lip | | |
|
||||
| 7 | Eye Glasses | 14 | Lower Lip | | |
|
||||
| 0 | Background | 7 | Left Ear | 14 | Neck |
|
||||
| 1 | Skin | 8 | Right Ear | 15 | Neck Lace |
|
||||
| 2 | Left Eyebrow | 9 | Ear Ring | 16 | Cloth |
|
||||
| 3 | Right Eyebrow | 10 | Nose | 17 | Hair |
|
||||
| 4 | Left Eye | 11 | Mouth | 18 | Hat |
|
||||
| 5 | Right Eye | 12 | Upper Lip | | |
|
||||
| 6 | Eye Glasses | 13 | Lower Lip | | |
|
||||
|
||||
**Applications:**
|
||||
|
||||
@@ -349,10 +349,14 @@ Face anti-spoofing models for liveness detection. Detect if a face is real (live
|
||||
|
||||
Models are automatically downloaded and cached on first use.
|
||||
|
||||
- **Cache location**: `~/.uniface/models/`
|
||||
- **Cache location**: `~/.uniface/models/` (configurable via `set_cache_dir()` or `UNIFACE_CACHE_DIR` env var)
|
||||
- **Inspect cache path**: `get_cache_dir()` returns the resolved active path
|
||||
- **Verification**: Models are verified with SHA-256 checksums
|
||||
- **Concurrent download**: `download_models([...])` fetches multiple models in parallel
|
||||
- **Manual download**: Use `python tools/download_model.py` to pre-download models
|
||||
|
||||
See [Model Cache & Offline Use](concepts/model-cache-offline.md) for full details.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
@@ -21,7 +21,8 @@ Predicts exact age and binary gender.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -54,7 +55,8 @@ Predicts gender, age group, and race with balanced demographics.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -120,12 +122,12 @@ Predicts facial emotions. Requires PyTorch.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
detector = RetinaFace()
|
||||
emotion = Emotion(model_weights=DDAMFNWeights.AFFECNET7)
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
@@ -147,7 +149,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
|
||||
=== "8-Class (AFFECNET8)"
|
||||
|
||||
@@ -159,7 +161,7 @@ for face in faces:
|
||||
| Surprise |
|
||||
| Fear |
|
||||
| Disgust |
|
||||
| Anger |
|
||||
| Angry |
|
||||
| Contempt |
|
||||
|
||||
### Model Variants
|
||||
@@ -169,10 +171,10 @@ from uniface.attribute import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
# 7-class emotion
|
||||
emotion = Emotion(model_weights=DDAMFNWeights.AFFECNET7)
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
|
||||
# 8-class emotion
|
||||
emotion = Emotion(model_weights=DDAMFNWeights.AFFECNET8)
|
||||
emotion = Emotion(model_name=DDAMFNWeights.AFFECNET8)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -182,7 +184,8 @@ emotion = Emotion(model_weights=DDAMFNWeights.AFFECNET8)
|
||||
### Full Attribute Analysis
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AgeGender, FairFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
@@ -206,7 +209,9 @@ for face in faces:
|
||||
### Using FaceAnalyzer
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer, RetinaFace, AgeGender
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
analyzer = FaceAnalyzer(
|
||||
RetinaFace(),
|
||||
|
||||
@@ -24,7 +24,7 @@ Single-shot face detector with multi-scale feature pyramid.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -38,7 +38,7 @@ for face in faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
# Lightweight (mobile/edge)
|
||||
@@ -82,7 +82,7 @@ State-of-the-art detection with excellent accuracy-speed tradeoff.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
|
||||
detector = SCRFD()
|
||||
faces = detector.detect(image)
|
||||
@@ -91,7 +91,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import SCRFD
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Real-time (lightweight)
|
||||
@@ -127,7 +127,7 @@ YOLO-based detection optimized for faces.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
|
||||
detector = YOLOv5Face()
|
||||
faces = detector.detect(image)
|
||||
@@ -136,7 +136,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.detection import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight
|
||||
@@ -179,7 +179,7 @@ Anchor-free detection with DFL (Distribution Focal Loss) for accurate bbox regre
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.detection import YOLOv8Face
|
||||
|
||||
detector = YOLOv8Face()
|
||||
faces = detector.detect(image)
|
||||
@@ -188,7 +188,7 @@ faces = detector.detect(image)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import YOLOv8Face
|
||||
from uniface.detection import YOLOv8Face
|
||||
from uniface.constants import YOLOv8FaceWeights
|
||||
|
||||
# Lightweight
|
||||
@@ -225,7 +225,7 @@ detector = YOLOv8Face(
|
||||
Create detectors dynamically:
|
||||
|
||||
```python
|
||||
from uniface import create_detector
|
||||
from uniface.detection import create_detector
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
# or
|
||||
@@ -238,22 +238,6 @@ detector = create_detector('yolov8face')
|
||||
|
||||
---
|
||||
|
||||
## High-Level API
|
||||
|
||||
One-line detection:
|
||||
|
||||
```python
|
||||
from uniface import detect_faces
|
||||
|
||||
# Using RetinaFace (default)
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.5)
|
||||
|
||||
# Using YOLOv8-Face
|
||||
faces = detect_faces(image, method='yolov8face', confidence_threshold=0.5)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Output Format
|
||||
|
||||
All detectors return `list[Face]`:
|
||||
@@ -276,7 +260,7 @@ for face in faces:
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
draw_detections(
|
||||
image=image,
|
||||
@@ -296,7 +280,7 @@ cv2.imwrite("result.jpg", image)
|
||||
Benchmark on your hardware:
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source image.jpg
|
||||
python tools/detect.py --source image.jpg
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -23,7 +23,8 @@ Gaze estimation predicts where a person is looking (pitch and yaw angles).
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -52,7 +53,7 @@ for face in faces:
|
||||
## Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (ResNet34, recommended)
|
||||
@@ -102,7 +103,7 @@ yaw = -90° ────┼──── yaw = +90°
|
||||
## Visualization
|
||||
|
||||
```python
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
@@ -154,8 +155,9 @@ def draw_gaze_custom(image, bbox, pitch, yaw, length=100, color=(0, 255, 0)):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -256,7 +258,7 @@ print(f"Looking: {direction}")
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_gaze_estimator
|
||||
from uniface.gaze import create_gaze_estimator
|
||||
|
||||
gaze = create_gaze_estimator() # Returns MobileGaze
|
||||
```
|
||||
|
||||
@@ -20,7 +20,8 @@ Facial landmark detection provides precise localization of facial features.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
@@ -78,7 +79,7 @@ mouth = landmarks[87:106]
|
||||
All detection models provide 5-point landmarks:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
faces = detector.detect(image)
|
||||
@@ -152,7 +153,7 @@ def draw_landmarks_with_connections(image, landmarks):
|
||||
### Face Alignment
|
||||
|
||||
```python
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
# Align face using 5-point landmarks
|
||||
aligned = face_alignment(image, faces[0].landmarks)
|
||||
@@ -236,7 +237,7 @@ def estimate_head_pose(landmarks, image_shape):
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_landmarker
|
||||
from uniface.landmark import create_landmarker
|
||||
|
||||
landmarker = create_landmarker() # Returns Landmark106
|
||||
```
|
||||
|
||||
@@ -19,7 +19,7 @@ Face parsing segments faces into semantic components or face regions.
|
||||
```python
|
||||
import cv2
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet()
|
||||
@@ -85,9 +85,9 @@ parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet()
|
||||
@@ -177,23 +177,19 @@ def apply_lip_color(image, mask, color=(180, 50, 50)):
|
||||
"""Apply lip color using parsing mask."""
|
||||
result = image.copy()
|
||||
|
||||
# Get lip mask (upper + lower lip)
|
||||
lip_mask = ((mask == 13) | (mask == 14)).astype(np.uint8)
|
||||
# Get lip mask (upper lip=12, lower lip=13)
|
||||
lip_mask = ((mask == 12) | (mask == 13)).astype(np.uint8)
|
||||
|
||||
# Create color overlay
|
||||
overlay = np.zeros_like(image)
|
||||
overlay[:] = color
|
||||
|
||||
# Blend with original
|
||||
lip_region = cv2.bitwise_and(overlay, overlay, mask=lip_mask)
|
||||
non_lip = cv2.bitwise_and(result, result, mask=1 - lip_mask)
|
||||
|
||||
# Combine with alpha blending
|
||||
# Alpha blend lip region
|
||||
alpha = 0.4
|
||||
result = cv2.addWeighted(result, 1 - alpha * lip_mask[:,:,np.newaxis] / 255,
|
||||
lip_region, alpha, 0)
|
||||
mask_3ch = lip_mask[:, :, np.newaxis]
|
||||
result = np.where(mask_3ch, (image * (1 - alpha) + overlay * alpha).astype(np.uint8), result)
|
||||
|
||||
return result.astype(np.uint8)
|
||||
return result
|
||||
```
|
||||
|
||||
### Background Replacement
|
||||
@@ -234,7 +230,7 @@ def get_hair_mask(mask):
|
||||
## Visualization Options
|
||||
|
||||
```python
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
# Default visualization
|
||||
vis_result = vis_parsing_maps(face_rgb, mask)
|
||||
@@ -257,7 +253,7 @@ XSeg outputs a mask for face regions. Unlike BiSeNet which works on bbox crops,
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -268,7 +264,7 @@ faces = detector.detect(image)
|
||||
|
||||
for face in faces:
|
||||
if face.landmarks is not None:
|
||||
mask = parser.parse(image, face.landmarks)
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
print(f"Mask shape: {mask.shape}") # (H, W), values in [0, 1]
|
||||
```
|
||||
|
||||
@@ -296,7 +292,7 @@ parser = XSeg(
|
||||
|
||||
```python
|
||||
# Full pipeline: align -> segment -> warp back to original space
|
||||
mask = parser.parse(image, landmarks)
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
# For pre-aligned face crops
|
||||
mask = parser.parse_aligned(face_crop)
|
||||
@@ -318,7 +314,7 @@ mask, face_crop, inverse_matrix = parser.parse_with_inverse(image, landmarks)
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_face_parser
|
||||
from uniface.parsing import create_face_parser
|
||||
from uniface.constants import ParsingWeights, XSegWeights
|
||||
|
||||
# BiSeNet (default)
|
||||
|
||||
@@ -18,25 +18,8 @@ Face anonymization protects privacy by blurring or obscuring faces in images and
|
||||
|
||||
## Quick Start
|
||||
|
||||
### One-Line Anonymization
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## BlurFace Class
|
||||
|
||||
For more control, use the `BlurFace` class:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
import cv2
|
||||
|
||||
@@ -59,7 +42,7 @@ cv2.imwrite("anonymized.jpg", anonymized)
|
||||
Blocky pixelation effect (common in news media):
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=10)
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=15)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
@@ -137,7 +120,7 @@ result = blurrer.anonymize(image, faces, inplace=True)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -166,7 +149,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -238,7 +221,7 @@ def anonymize_low_confidence(image, faces, blurrer, confidence_threshold=0.8):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -259,13 +242,13 @@ for method in methods:
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -22,7 +22,8 @@ Face recognition using adaptive margin based on image quality.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, AdaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import AdaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = AdaFace()
|
||||
@@ -39,7 +40,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import AdaFace
|
||||
from uniface.recognition import AdaFace
|
||||
from uniface.constants import AdaFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -69,7 +70,8 @@ Face recognition using additive angular margin loss.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -86,7 +88,7 @@ if faces:
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import ArcFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.constants import ArcFaceWeights
|
||||
|
||||
# Lightweight (default)
|
||||
@@ -118,7 +120,7 @@ Lightweight face recognition models with MobileNet backbones.
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
|
||||
recognizer = MobileFace()
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
@@ -127,7 +129,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Model Variants
|
||||
|
||||
```python
|
||||
from uniface import MobileFace
|
||||
from uniface.recognition import MobileFace
|
||||
from uniface.constants import MobileFaceWeights
|
||||
|
||||
# Ultra-lightweight
|
||||
@@ -156,7 +158,7 @@ Face recognition using angular softmax loss (A-Softmax).
|
||||
### Basic Usage
|
||||
|
||||
```python
|
||||
from uniface import SphereFace
|
||||
from uniface.recognition import SphereFace
|
||||
from uniface.constants import SphereFaceWeights
|
||||
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
@@ -175,7 +177,7 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
### Compute Similarity
|
||||
|
||||
```python
|
||||
from uniface import compute_similarity
|
||||
from uniface.face_utils import compute_similarity
|
||||
import numpy as np
|
||||
|
||||
# Extract embeddings
|
||||
@@ -211,7 +213,7 @@ Recognition models require aligned faces. UniFace handles this internally:
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Or manually align
|
||||
from uniface import face_alignment
|
||||
from uniface.face_utils import face_alignment
|
||||
|
||||
aligned_face = face_alignment(image, landmarks)
|
||||
# Returns: 112x112 aligned face image
|
||||
@@ -223,7 +225,8 @@ aligned_face = face_alignment(image, landmarks)
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
@@ -282,7 +285,7 @@ else:
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_recognizer
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Available methods: 'arcface', 'adaface', 'mobileface', 'sphereface'
|
||||
recognizer = create_recognizer('arcface')
|
||||
|
||||
@@ -17,7 +17,7 @@ Face anti-spoofing detects whether a face is real (live) or fake (photo, video r
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -128,7 +128,7 @@ cv2.imwrite("spoofing_result.jpg", image)
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -253,7 +253,7 @@ python tools/spoofing.py --source 0
|
||||
## Factory Function
|
||||
|
||||
```python
|
||||
from uniface import create_spoofer
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
spoofer = create_spoofer() # Returns MiniFASNet
|
||||
```
|
||||
|
||||
263
docs/modules/tracking.md
Normal file
263
docs/modules/tracking.md
Normal file
@@ -0,0 +1,263 @@
|
||||
# Tracking
|
||||
|
||||
Multi-object tracking using [BYTETracker](https://github.com/yakhyo/bytetrack-tracker) with Kalman filtering and IoU-based association. The tracker assigns persistent IDs to detected objects across video frames using a two-stage association strategy — first matching high-confidence detections, then low-confidence ones.
|
||||
|
||||
---
|
||||
|
||||
## How It Works
|
||||
|
||||
BYTETracker takes detection bounding boxes as input and returns tracked bounding boxes with persistent IDs. It does not depend on any specific detector — any source of `[x1, y1, x2, y2, score]` arrays will work.
|
||||
|
||||
Each frame, the tracker:
|
||||
|
||||
1. Splits detections into high-confidence and low-confidence groups
|
||||
2. Matches high-confidence detections to existing tracks using IoU
|
||||
3. Matches remaining tracks to low-confidence detections (second chance)
|
||||
4. Starts new tracks for unmatched high-confidence detections
|
||||
5. Removes tracks that have been lost for too long
|
||||
|
||||
The Kalman filter predicts where each track will be in the next frame, which helps maintain associations even when detections are noisy.
|
||||
|
||||
---
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# 1. Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# 2. Build detections array: [x1, y1, x2, y2, score]
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# 3. Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# 4. Map track IDs back to face objects
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
# 5. Draw
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
Each track ID gets a deterministic color via golden-ratio hue stepping, so the same person keeps the same color across the entire video.
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking - Press 'q' to quit", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Parameters
|
||||
|
||||
```python
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.5,
|
||||
track_buffer=30,
|
||||
match_thresh=0.8,
|
||||
low_thresh=0.1,
|
||||
)
|
||||
```
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `track_thresh` | 0.5 | Detections above this score go through first-pass association |
|
||||
| `track_buffer` | 30 | How many frames to keep a lost track before removing it |
|
||||
| `match_thresh` | 0.8 | IoU threshold for matching tracks to detections |
|
||||
| `low_thresh` | 0.1 | Detections below this score are discarded entirely |
|
||||
|
||||
---
|
||||
|
||||
## Input / Output
|
||||
|
||||
**Input** — `(N, 5)` numpy array with `[x1, y1, x2, y2, confidence]` per detection:
|
||||
|
||||
```python
|
||||
detections = np.array([
|
||||
[100, 50, 200, 160, 0.95],
|
||||
[300, 80, 380, 200, 0.87],
|
||||
])
|
||||
```
|
||||
|
||||
**Output** — `(M, 5)` numpy array with `[x1, y1, x2, y2, track_id]` per active track:
|
||||
|
||||
```python
|
||||
tracks = tracker.update(detections)
|
||||
# array([[101.2, 51.3, 199.8, 159.8, 1.],
|
||||
# [300.5, 80.2, 379.7, 200.1, 2.]])
|
||||
```
|
||||
|
||||
The output bounding boxes come from the Kalman filter prediction, so they may differ slightly from the input. Track IDs are integers that persist across frames for the same object.
|
||||
|
||||
---
|
||||
|
||||
## Resetting the Tracker
|
||||
|
||||
When switching to a different video or scene, reset the tracker to clear all internal state:
|
||||
|
||||
```python
|
||||
tracker.reset()
|
||||
```
|
||||
|
||||
This clears all active, lost, and removed tracks, resets the frame counter, and resets the ID counter back to zero.
|
||||
|
||||
---
|
||||
|
||||
## Visualization
|
||||
|
||||
`draw_tracks` draws bounding boxes color-coded by track ID:
|
||||
|
||||
```python
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
draw_tracks(
|
||||
image=frame,
|
||||
faces=tracked_faces,
|
||||
draw_landmarks=True,
|
||||
draw_id=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Small Face Performance
|
||||
|
||||
!!! warning "Tracking performance with small faces"
|
||||
The tracker relies on IoU (Intersection over Union) to match detections across
|
||||
frames. When faces occupy a small portion of the image — for example in
|
||||
surveillance footage or wide-angle cameras — even slight movement between frames
|
||||
can cause a large drop in IoU. This makes it harder for the tracker to maintain
|
||||
consistent IDs, and you may see IDs switching or resetting more often than expected.
|
||||
|
||||
This is not specific to BYTETracker; it applies to any IoU-based tracker. A few
|
||||
things that can help:
|
||||
|
||||
- **Lower `match_thresh`** (e.g. `0.5` or `0.6`) so the tracker accepts lower
|
||||
overlap as a valid match.
|
||||
- **Increase `track_buffer`** (e.g. `60` or higher) to hold onto lost tracks
|
||||
longer before discarding them.
|
||||
- **Use a higher-resolution input** if possible, so face bounding boxes are
|
||||
larger in pixel terms.
|
||||
|
||||
```python
|
||||
tracker = BYTETracker(
|
||||
track_thresh=0.4,
|
||||
track_buffer=60,
|
||||
match_thresh=0.6,
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Tool
|
||||
|
||||
```bash
|
||||
# Track faces in a video
|
||||
python tools/track.py --source video.mp4
|
||||
|
||||
# Webcam
|
||||
python tools/track.py --source 0
|
||||
|
||||
# Save output
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
|
||||
# Use RetinaFace instead of SCRFD
|
||||
python tools/track.py --source video.mp4 --detector retinaface
|
||||
|
||||
# Keep lost tracks longer
|
||||
python tools/track.py --source video.mp4 --track-buffer 60
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [yakhyo/bytetrack-tracker](https://github.com/yakhyo/bytetrack-tracker) — standalone BYTETracker implementation used in UniFace
|
||||
- [ByteTrack paper](https://arxiv.org/abs/2110.06864) — Zhang et al., "ByteTrack: Multi-Object Tracking by Associating Every Detection Box"
|
||||
|
||||
---
|
||||
|
||||
## See Also
|
||||
|
||||
- [Detection](detection.md) — face detection models
|
||||
- [Video & Webcam](../recipes/video-webcam.md) — video processing patterns
|
||||
- [Inputs & Outputs](../concepts/inputs-outputs.md) — data types and formats
|
||||
7
docs/overrides/main.html
Normal file
7
docs/overrides/main.html
Normal file
@@ -0,0 +1,7 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block announce %}
|
||||
<a href="https://github.com/yakhyo/uniface" target="_blank" rel="noopener">
|
||||
Support our work — give UniFace a <span class="twemoji">{% include ".icons/octicons/star-fill-16.svg" %}</span> on <strong>GitHub</strong> and help us reach more developers!
|
||||
</a>
|
||||
{% endblock %}
|
||||
@@ -10,7 +10,7 @@ Detect faces in an image:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
@@ -46,8 +46,8 @@ Draw bounding boxes and landmarks:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Detect faces
|
||||
detector = RetinaFace()
|
||||
@@ -81,7 +81,8 @@ Compare two faces:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -121,7 +122,8 @@ if faces1 and faces2:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, AgeGender
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -152,7 +154,8 @@ Detect race, gender, and age group:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
@@ -178,7 +181,8 @@ Face 2: Female, 20-29, White
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, Landmark106
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
@@ -204,8 +208,9 @@ if faces:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
@@ -237,7 +242,7 @@ Segment face into semantic components:
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
from uniface.draw import vis_parsing_maps
|
||||
|
||||
parser = BiSeNet()
|
||||
|
||||
@@ -261,26 +266,24 @@ print(f"Detected {len(np.unique(mask))} facial components")
|
||||
Blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Manual control:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Custom blur settings:**
|
||||
|
||||
```python
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
```
|
||||
|
||||
**Available methods:**
|
||||
@@ -301,7 +304,7 @@ Detect real vs. fake faces:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -324,8 +327,8 @@ Real-time face detection:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -355,6 +358,60 @@ cv2.destroyAllWindows()
|
||||
|
||||
---
|
||||
|
||||
## Face Tracking
|
||||
|
||||
Track faces across video frames with persistent IDs:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
|
||||
cap = cv2.VideoCapture("video.mp4")
|
||||
|
||||
while cap.isOpened():
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
# Assign track IDs to faces
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
tracked_faces = [f for f in faces if f.track_id is not None]
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
cv2.imshow("Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details, see the [Tracking module](modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Model Selection
|
||||
|
||||
For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
@@ -365,6 +422,7 @@ For detailed model comparisons and benchmarks, see the [Model Zoo](models.md).
|
||||
|------|------------------|
|
||||
| Detection | `RetinaFace`, `SCRFD`, `YOLOv5Face`, `YOLOv8Face` |
|
||||
| Recognition | `ArcFace`, `AdaFace`, `MobileFace`, `SphereFace` |
|
||||
| Tracking | `BYTETracker` |
|
||||
| Gaze | `MobileGaze` (ResNet18/34/50, MobileNetV2, MobileOneS0) |
|
||||
| Parsing | `BiSeNet` (ResNet18/34) |
|
||||
| Attributes | `AgeGender`, `FairFace`, `Emotion` |
|
||||
@@ -407,13 +465,17 @@ python -c "import platform; print(platform.machine())"
|
||||
### Import Errors
|
||||
|
||||
```python
|
||||
# Correct imports
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, AdaFace
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
# Also works (re-exported at package level)
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.parsing import BiSeNet, XSeg
|
||||
from uniface.privacy import BlurFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.draw import draw_detections, draw_tracks
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -11,7 +11,7 @@ Blur faces in real-time video streams for privacy protection.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -40,7 +40,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
@@ -67,14 +67,19 @@ out.release()
|
||||
|
||||
---
|
||||
|
||||
## One-Liner for Images
|
||||
## Single Image
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
result = anonymize_faces(image, method='pixelate')
|
||||
faces = detector.detect(image)
|
||||
result = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("anonymized.jpg", result)
|
||||
```
|
||||
|
||||
@@ -84,7 +89,7 @@ cv2.imwrite("anonymized.jpg", result)
|
||||
|
||||
| Method | Usage |
|
||||
|--------|-------|
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=10)` |
|
||||
| Pixelate | `BlurFace(method='pixelate', pixel_blocks=15)` |
|
||||
| Gaussian | `BlurFace(method='gaussian', blur_strength=3.0)` |
|
||||
| Blackout | `BlurFace(method='blackout', color=(0,0,0))` |
|
||||
| Elliptical | `BlurFace(method='elliptical', margin=20)` |
|
||||
|
||||
@@ -12,7 +12,7 @@ Process multiple images efficiently.
|
||||
```python
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -54,7 +54,8 @@ for image_path in tqdm(image_files, desc="Processing"):
|
||||
## Extract Embeddings
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
|
||||
@@ -13,7 +13,8 @@ Build a face search system for finding people in images.
|
||||
import numpy as np
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from uniface import RetinaFace, ArcFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
class FaceDatabase:
|
||||
def __init__(self):
|
||||
|
||||
@@ -8,8 +8,10 @@ A complete pipeline for processing images with detection, recognition, and attri
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, ArcFace, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
@@ -67,7 +69,10 @@ cv2.imwrite("result.jpg", result_image)
|
||||
For convenience, use the built-in `FaceAnalyzer`:
|
||||
|
||||
```python
|
||||
from uniface import FaceAnalyzer, RetinaFace, ArcFace, AgeGender
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.recognition import ArcFace
|
||||
import cv2
|
||||
|
||||
# Initialize with desired modules
|
||||
@@ -101,13 +106,14 @@ Complete pipeline with all modules:
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import (
|
||||
RetinaFace, ArcFace, AgeGender, FairFace,
|
||||
Landmark106, MobileGaze
|
||||
)
|
||||
from uniface.attribute import AgeGender, FairFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
class FaceAnalysisPipeline:
|
||||
def __init__(self):
|
||||
@@ -193,8 +199,10 @@ for i, r in enumerate(results):
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, AgeGender, MobileGaze
|
||||
from uniface.visualization import draw_detections, draw_gaze
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.draw import draw_detections, draw_gaze
|
||||
|
||||
def visualize_analysis(image_path, output_path):
|
||||
"""Create annotated visualization of face analysis."""
|
||||
|
||||
@@ -11,8 +11,8 @@ Real-time face analysis for video streams.
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
detector = RetinaFace()
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -48,7 +48,7 @@ cv2.destroyAllWindows()
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
|
||||
def process_video(input_path, output_path):
|
||||
"""Process a video file."""
|
||||
@@ -83,6 +83,57 @@ process_video("input.mp4", "output.mp4")
|
||||
|
||||
---
|
||||
|
||||
## Webcam Tracking
|
||||
|
||||
To track faces across frames with persistent IDs, pair a detector with `BYTETracker`:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD
|
||||
from uniface.tracking import BYTETracker
|
||||
from uniface.draw import draw_tracks
|
||||
|
||||
detector = SCRFD()
|
||||
tracker = BYTETracker(track_thresh=0.5, track_buffer=30)
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
tracks = tracker.update(dets)
|
||||
|
||||
if len(tracks) > 0 and len(faces) > 0:
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
draw_tracks(image=frame, faces=[f for f in faces if f.track_id is not None])
|
||||
cv2.imshow("Face Tracking", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
For more details on tracker parameters and tuning, see [Tracking](../modules/tracking.md).
|
||||
|
||||
---
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Skip Frames
|
||||
@@ -119,7 +170,8 @@ while True:
|
||||
|
||||
## See Also
|
||||
|
||||
- [Tracking Module](../modules/tracking.md) - Face tracking with BYTETracker
|
||||
- [Anonymize Stream](anonymize-stream.md) - Privacy protection in video
|
||||
- [Batch Processing](batch-processing.md) - Process multiple files
|
||||
- [Detection Module](../modules/detection.md) - Detection options
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze tracking
|
||||
- [Gaze Module](../modules/gaze.md) - Gaze estimation
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"\n",
|
||||
|
||||
@@ -58,7 +58,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"\n",
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"import uniface\n",
|
||||
"from uniface import FaceAnalyzer\n",
|
||||
"from uniface.analyzer import FaceAnalyzer\n",
|
||||
"from uniface.detection import RetinaFace\n",
|
||||
"from uniface.recognition import ArcFace\n",
|
||||
"from uniface.attribute import AgeGender\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -48,6 +48,7 @@ theme:
|
||||
- content.action.edit
|
||||
- content.action.view
|
||||
- content.tabs.link
|
||||
- announce.dismiss
|
||||
- toc.follow
|
||||
|
||||
icon:
|
||||
@@ -144,6 +145,7 @@ nav:
|
||||
- API Reference:
|
||||
- Detection: modules/detection.md
|
||||
- Recognition: modules/recognition.md
|
||||
- Tracking: modules/tracking.md
|
||||
- Landmarks: modules/landmarks.md
|
||||
- Attributes: modules/attributes.md
|
||||
- Parsing: modules/parsing.md
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "2.3.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
version = "3.0.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Tracking, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = "MIT"
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
@@ -13,6 +13,7 @@ requires-python = ">=3.10,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"face-tracking",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
@@ -42,9 +43,9 @@ classifiers = [
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
"onnx>=1.12.0",
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"scipy>=1.7.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
@@ -56,9 +57,9 @@ gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
Documentation = "https://yakhyo.github.io/uniface"
|
||||
"Quick Start" = "https://yakhyo.github.io/uniface/quickstart/"
|
||||
"Model Zoo" = "https://yakhyo.github.io/uniface/models/"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
numpy>=1.21.0
|
||||
opencv-python>=4.5.0
|
||||
onnx>=1.12.0
|
||||
onnxruntime>=1.16.0
|
||||
scikit-image>=0.19.0
|
||||
scipy>=1.7.0
|
||||
requests>=2.28.0
|
||||
pytest>=7.0.0
|
||||
tqdm>=4.64.0
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
61
tests/test_draw.py
Normal file
61
tests/test_draw.py
Normal file
@@ -0,0 +1,61 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.draw import draw_gaze
|
||||
|
||||
|
||||
def _compute_gaze_delta(bbox: np.ndarray, pitch: float, yaw: float) -> tuple[int, int]:
|
||||
"""Replicate draw_gaze dx/dy math for verification."""
|
||||
x_min, _, x_max, _ = map(int, bbox[:4])
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(yaw) * np.cos(pitch))
|
||||
dy = int(-length * np.sin(pitch))
|
||||
return dx, dy
|
||||
|
||||
|
||||
def test_draw_gaze_yaw_only_moves_horizontally():
|
||||
"""Yaw-only input (pitch=0) should produce horizontal displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.5
|
||||
pitch = 0.0
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx != 0, 'Yaw-only should produce horizontal displacement'
|
||||
assert dy == 0, 'Yaw-only should produce zero vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_pitch_only_moves_vertically():
|
||||
"""Pitch-only input (yaw=0) should produce vertical displacement only."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
yaw = 0.0
|
||||
pitch = 0.5
|
||||
dx, dy = _compute_gaze_delta(bbox, pitch, yaw)
|
||||
|
||||
assert dx == 0, 'Pitch-only should produce zero horizontal displacement'
|
||||
assert dy != 0, 'Pitch-only should produce vertical displacement'
|
||||
|
||||
# Should not raise
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_bbox=False, draw_angles=False)
|
||||
|
||||
|
||||
def test_draw_gaze_modifies_image():
|
||||
"""draw_gaze should modify the image in place."""
|
||||
image = np.zeros((200, 200, 3), dtype=np.uint8)
|
||||
bbox = np.array([50, 50, 150, 150], dtype=np.float32)
|
||||
|
||||
original = image.copy()
|
||||
draw_gaze(image, bbox, 0.3, 0.3)
|
||||
|
||||
assert not np.array_equal(image, original), 'draw_gaze should modify the image'
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -13,10 +12,10 @@ from uniface import (
|
||||
create_detector,
|
||||
create_landmarker,
|
||||
create_recognizer,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.spoofing import MiniFASNet, create_spoofer
|
||||
|
||||
|
||||
# create_detector tests
|
||||
@@ -123,62 +122,6 @@ def test_create_landmarker_invalid_method():
|
||||
create_landmarker('invalid_method')
|
||||
|
||||
|
||||
# detect_faces tests
|
||||
def test_detect_faces_retinaface():
|
||||
"""
|
||||
Test high-level detect_faces function with RetinaFace.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
"""
|
||||
Test high-level detect_faces function with SCRFD.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
"""
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
"""
|
||||
Test detect_faces with default method (should use retinaface).
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
"""
|
||||
Test detect_faces on a blank image.
|
||||
"""
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
def test_list_available_detectors():
|
||||
"""
|
||||
@@ -280,3 +223,16 @@ def test_factory_returns_correct_types():
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
|
||||
# create_spoofer tests
|
||||
def test_create_spoofer_default():
|
||||
"""Test creating a spoofer with default parameters."""
|
||||
spoofer = create_spoofer()
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
|
||||
def test_create_spoofer_with_providers():
|
||||
"""Test that create_spoofer forwards providers kwarg without TypeError."""
|
||||
spoofer = create_spoofer(providers=['CPUExecutionProvider'])
|
||||
assert isinstance(spoofer, MiniFASNet), 'Should return MiniFASNet instance'
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face parsing models (BiSeNet and XSeg)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -209,7 +208,7 @@ def test_xseg_parse_with_landmarks():
|
||||
)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(image, landmarks)
|
||||
mask = parser.parse(image, landmarks=landmarks)
|
||||
|
||||
assert mask.shape == (480, 640)
|
||||
assert mask.dtype == np.float32
|
||||
@@ -226,7 +225,7 @@ def test_xseg_parse_invalid_landmarks():
|
||||
invalid_landmarks = np.array([[0, 0], [1, 1], [2, 2]])
|
||||
|
||||
with pytest.raises(ValueError, match='Landmarks must have shape'):
|
||||
parser.parse(image, invalid_landmarks)
|
||||
parser.parse(image, landmarks=invalid_landmarks)
|
||||
|
||||
|
||||
def test_xseg_parse_with_inverse():
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
@@ -6,27 +6,27 @@ CLI utilities for testing and running UniFace features.
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `detect.py` | Face detection on image, video, or webcam |
|
||||
| `track.py` | Face tracking on video with ByteTrack |
|
||||
| `analyze.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `recognize.py` | Face embedding extraction and comparison |
|
||||
| `search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `attribute.py` | Age and gender prediction |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation (BiSeNet) |
|
||||
| `parse.py` | Face semantic segmentation (BiSeNet) |
|
||||
| `xseg.py` | Face segmentation (XSeg) |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
Most tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
@@ -35,26 +35,31 @@ All tools use a unified `--source` argument that accepts:
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
python tools/detect.py --source assets/test.jpg # image
|
||||
python tools/detect.py --source video.mp4 # video
|
||||
python tools/detect.py --source 0 # webcam
|
||||
|
||||
# Face tracking
|
||||
python tools/track.py --source video.mp4
|
||||
python tools/track.py --source video.mp4 --output tracked.mp4
|
||||
python tools/track.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
python tools/anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
python tools/attribute.py --source assets/test.jpg
|
||||
python tools/attribute.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
python tools/emotion.py --source assets/test.jpg
|
||||
python tools/emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
python tools/gaze.py --source assets/test.jpg
|
||||
python tools/gaze.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
@@ -65,8 +70,8 @@ python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing (BiSeNet)
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
python tools/parse.py --source assets/test.jpg
|
||||
python tools/parse.py --source 0
|
||||
|
||||
# Face segmentation (XSeg)
|
||||
python tools/xseg.py --source assets/test.jpg
|
||||
@@ -77,22 +82,18 @@ python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
python tools/analyze.py --source assets/test.jpg
|
||||
python tools/analyze.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
python tools/recognize.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
python tools/recognize.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/search.py --reference person.jpg --source 0
|
||||
python tools/search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
@@ -122,5 +123,5 @@ python tools/download_model.py # downloads all
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
python tools/detect.py --source assets/test.jpg
|
||||
```
|
||||
|
||||
29
tools/_common.py
Normal file
29
tools/_common.py
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera.
|
||||
|
||||
Args:
|
||||
source: File path or camera ID string (e.g. ``"0"``).
|
||||
|
||||
Returns:
|
||||
One of ``"image"``, ``"video"``, ``"camera"``, or ``"unknown"``.
|
||||
"""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
suffix = Path(source).suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
if suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
return 'unknown'
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
python tools/analyze.py --source path/to/image.jpg
|
||||
python tools/analyze.py --source path/to/video.mp4
|
||||
python tools/analyze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,28 +16,15 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.analyzer import FaceAnalyzer
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
from uniface.recognition import ArcFace
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
@@ -111,7 +98,7 @@ def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_sim
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
@@ -153,7 +140,7 @@ def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
@@ -189,7 +176,7 @@ def run_camera(analyzer, camera_id: int = 0):
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, corner_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
python tools/anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,28 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
@@ -56,7 +40,7 @@ def process_image(
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
@@ -171,19 +155,19 @@ def main():
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
python tools/anonymize.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
python tools/anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
python tools/anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
python tools/anonymize.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
python tools/anonymize.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
python tools/attribute.py --source path/to/image.jpg
|
||||
python tools/attribute.py --source path/to/video.mp4
|
||||
python tools/attribute.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
@@ -71,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -123,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -162,7 +147,7 @@ def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6)
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -14,8 +14,8 @@ from pathlib import Path
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
@@ -39,7 +39,7 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
python tools/detect.py --source path/to/image.jpg
|
||||
python tools/detect.py --source path/to/video.mp4
|
||||
python tools/detect.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
@@ -52,7 +37,7 @@ def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: s
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
@@ -60,34 +45,47 @@ def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: s
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
while True:
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
@@ -99,19 +97,26 @@ def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: s
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
@@ -125,7 +130,7 @@ def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
@@ -141,7 +146,7 @@ def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
corner_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
@@ -158,18 +163,24 @@ def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face']
|
||||
'--detector',
|
||||
'--method',
|
||||
type=str,
|
||||
default='retinaface',
|
||||
choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face'],
|
||||
)
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview during video processing')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
elif args.detector == 'scrfd':
|
||||
detector = SCRFD()
|
||||
elif args.method == 'yolov5face':
|
||||
elif args.detector == 'yolov5face':
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
@@ -178,7 +189,6 @@ def main():
|
||||
|
||||
detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
@@ -192,7 +202,12 @@ def main():
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
python tools/emotion.py --source path/to/image.jpg
|
||||
python tools/emotion.py --source path/to/video.mp4
|
||||
python tools/emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.attribute import Emotion
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
@@ -71,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -123,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -162,7 +147,7 @@ def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -16,28 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_detections
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
@@ -72,7 +56,7 @@ def process_image(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
@@ -124,7 +108,7 @@ def process_video(
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
@@ -163,7 +147,7 @@ def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, corner_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
python tools/gaze.py --source path/to/image.jpg
|
||||
python tools/gaze.py --source path/to/video.mp4
|
||||
python tools/gaze.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,29 +16,13 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import draw_gaze
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
@@ -16,26 +16,11 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
python tools/parse.py --source path/to/image.jpg
|
||||
python tools/parse.py --source path/to/video.mp4
|
||||
python tools/parse.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -16,30 +16,14 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.draw import vis_parsing_maps
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
@@ -225,7 +209,7 @@ def main():
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
parser = BiSeNet(model_name=args.model)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
python tools/recognize.py --image path/to/image.jpg
|
||||
python tools/recognize.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -41,7 +41,7 @@ def run_inference(detector, recognizer, image_path: str):
|
||||
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
landmarks = faces[0].landmarks # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
@@ -65,8 +65,8 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
landmarks1 = faces1[0].landmarks
|
||||
landmarks2 = faces2[0].landmarks
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
@@ -5,8 +5,8 @@
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
python tools/search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -15,6 +15,7 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
@@ -22,23 +23,6 @@ from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
@@ -16,30 +16,14 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
|
||||
199
tools/track.py
Normal file
199
tools/track.py
Normal file
@@ -0,0 +1,199 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face tracking on video files using ByteTrack.
|
||||
|
||||
Usage:
|
||||
python tools/track.py --source video.mp4
|
||||
python tools/track.py --source video.mp4 --output outputs/tracked.mp4
|
||||
python tools/track.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import VIDEO_EXTENSIONS
|
||||
import cv2
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.common import xyxy_to_cxcywh
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.draw import draw_tracks
|
||||
from uniface.tracking import BYTETracker
|
||||
|
||||
|
||||
def _assign_track_ids(faces, tracks) -> list:
|
||||
"""Match tracker outputs back to Face objects by center distance."""
|
||||
if len(tracks) == 0 or len(faces) == 0:
|
||||
return []
|
||||
|
||||
face_bboxes = np.array([f.bbox for f in faces], dtype=np.float32)
|
||||
track_ids = tracks[:, 4].astype(int)
|
||||
|
||||
face_centers = xyxy_to_cxcywh(face_bboxes)[:, :2] # (N, 2) -> [cx, cy]
|
||||
track_centers = xyxy_to_cxcywh(tracks[:, :4])[:, :2] # (M, 2) -> [cx, cy]
|
||||
|
||||
for ti in range(len(tracks)):
|
||||
dists = (track_centers[ti, 0] - face_centers[:, 0]) ** 2 + (track_centers[ti, 1] - face_centers[:, 1]) ** 2
|
||||
faces[int(np.argmin(dists))].track_id = track_ids[ti]
|
||||
|
||||
return [f for f in faces if f.track_id is not None]
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
tracker: BYTETracker,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.5,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with face tracking."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_tracks = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Tracking', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces if f.confidence >= threshold])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
tracked_faces = _assign_track_ids(faces, tracks)
|
||||
total_tracks += len(tracked_faces)
|
||||
|
||||
# Draw tracked faces
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
|
||||
cv2.putText(frame, f'Tracks: {len(tracked_faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Tracking - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_tracks = total_tracks / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_tracks} tracks ({avg_tracks:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(
|
||||
detector,
|
||||
tracker: BYTETracker,
|
||||
camera_id: int = 0,
|
||||
threshold: float = 0.5,
|
||||
):
|
||||
"""Run real-time face tracking on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
dets = np.array([[*f.bbox, f.confidence] for f in faces if f.confidence >= threshold])
|
||||
dets = dets if len(dets) > 0 else np.empty((0, 5))
|
||||
|
||||
# Update tracker
|
||||
tracks = tracker.update(dets)
|
||||
tracked_faces = _assign_track_ids(faces, tracks)
|
||||
|
||||
# Draw tracked faces
|
||||
draw_tracks(image=frame, faces=tracked_faces)
|
||||
|
||||
cv2.putText(frame, f'Tracks: {len(tracked_faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Tracking', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face tracking on video using ByteTrack')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.5, help='Detection confidence threshold')
|
||||
parser.add_argument('--track-buffer', type=int, default=30, help='Max frames to keep lost tracks')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
tracker = BYTETracker(track_thresh=args.threshold, track_buffer=args.track_buffer)
|
||||
|
||||
if args.source.isdigit():
|
||||
run_camera(detector, tracker, int(args.source), args.threshold)
|
||||
else:
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
ext = Path(args.source).suffix.lower()
|
||||
if ext not in VIDEO_EXTENSIONS:
|
||||
print(f"Error: Unsupported format '{ext}'. Supported: {VIDEO_EXTENSIONS}")
|
||||
return
|
||||
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_tracked.mp4')
|
||||
|
||||
process_video(detector, tracker, args.source, output_path, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,180 +0,0 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -16,27 +16,12 @@ import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from _common import get_source_type
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace, XSeg
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.parsing import XSeg
|
||||
|
||||
|
||||
def apply_mask_visualization(image: np.ndarray, mask: np.ndarray, alpha: float = 0.5) -> np.ndarray:
|
||||
@@ -74,7 +59,7 @@ def process_image(
|
||||
print(f' Face {i + 1}: skipped (no landmarks)')
|
||||
continue
|
||||
|
||||
mask = parser.parse(image, face.landmarks)
|
||||
mask = parser.parse(image, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
print(f' Face {i + 1}: done')
|
||||
|
||||
@@ -136,7 +121,7 @@ def process_video(
|
||||
for face in faces:
|
||||
if face.landmarks is None:
|
||||
continue
|
||||
mask = parser.parse(frame, face.landmarks)
|
||||
mask = parser.parse(frame, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
|
||||
# Apply visualization
|
||||
@@ -184,7 +169,7 @@ def run_camera(
|
||||
for face in faces:
|
||||
if face.landmarks is None:
|
||||
continue
|
||||
mask = parser.parse(frame, face.landmarks)
|
||||
mask = parser.parse(frame, landmarks=face.landmarks)
|
||||
full_mask = np.maximum(full_mask, mask)
|
||||
|
||||
# Apply visualization
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face, YOLOv8Face)
|
||||
- Face recognition (AdaFace, ArcFace, MobileFace, SphereFace)
|
||||
- Face tracking (ByteTrack with Kalman filtering)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
@@ -28,39 +29,31 @@ from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '2.3.0'
|
||||
__version__ = '3.0.0'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
from uniface.model_store import download_models, get_cache_dir, set_cache_dir, verify_model_weights
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .attribute import AgeGender, Emotion, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
YOLOv8Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .parsing import BiSeNet, XSeg, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .privacy import BlurFace
|
||||
from .recognition import AdaFace, ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .tracking import BYTETracker
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
# Metadata
|
||||
'__author__',
|
||||
@@ -76,7 +69,6 @@ __all__ = [
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
'RetinaFace',
|
||||
@@ -105,15 +97,17 @@ __all__ = [
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Tracking
|
||||
'BYTETracker',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'download_models',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'get_cache_dir',
|
||||
'set_cache_dir',
|
||||
'verify_model_weights',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
@@ -12,18 +12,27 @@ from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
class Emotion(Attribute): # type: ignore[no-redef]
|
||||
"""Stub for Emotion when PyTorch is not installed."""
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any) -> None:
|
||||
raise ImportError("Emotion requires optional dependency 'torch'. Install with: pip install torch")
|
||||
|
||||
def _initialize_model(self) -> None: ...
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any: ...
|
||||
def postprocess(self, prediction: Any) -> Any: ...
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any: ...
|
||||
|
||||
|
||||
__all__ = [
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
@@ -31,16 +40,13 @@ __all__ = [
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
@@ -48,21 +54,16 @@ if _EMOTION_AVAILABLE:
|
||||
def create_attribute_predictor(
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
This high-level API simplifies the creation of attribute models by
|
||||
dynamically selecting the correct class based on the provided model enum.
|
||||
"""Factory function to create an attribute predictor instance.
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments passed to the model constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
An initialized Attribute predictor (AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -75,40 +76,4 @@ def create_attribute_predictor(
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return faces
|
||||
|
||||
@@ -28,17 +28,17 @@ class Emotion(Attribute):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
model_name: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
|
||||
Args:
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
model_name (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f'Initializing Emotion with model={model_weights.name}')
|
||||
Logger.info(f'Initializing Emotion with model={model_name.name}')
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.device = torch.device('mps')
|
||||
@@ -48,7 +48,7 @@ class Emotion(Attribute):
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = [
|
||||
@@ -60,7 +60,7 @@ class Emotion(Attribute):
|
||||
'Disgust',
|
||||
'Angry',
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
if model_name == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append('Contempt')
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
@@ -18,6 +18,7 @@ __all__ = [
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
'xyxy_to_cxcywh',
|
||||
]
|
||||
|
||||
|
||||
@@ -61,6 +62,23 @@ def resize_image(
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def xyxy_to_cxcywh(bboxes: np.ndarray) -> np.ndarray:
|
||||
"""Convert bounding boxes from ``[x1, y1, x2, y2]`` to ``[cx, cy, w, h]``.
|
||||
|
||||
Args:
|
||||
bboxes: Array of shape (N, 4) or (4,) with ``[x1, y1, x2, y2]`` coordinates.
|
||||
|
||||
Returns:
|
||||
Array of the same shape with ``[cx, cy, w, h]`` coordinates.
|
||||
"""
|
||||
out = np.empty_like(bboxes)
|
||||
out[..., 0] = (bboxes[..., 0] + bboxes[..., 2]) / 2 # cx
|
||||
out[..., 1] = (bboxes[..., 1] + bboxes[..., 3]) / 2 # cy
|
||||
out[..., 2] = bboxes[..., 2] - bboxes[..., 0] # w
|
||||
out[..., 3] = bboxes[..., 3] - bboxes[..., 1] # h
|
||||
return out
|
||||
|
||||
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
|
||||
@@ -272,11 +272,11 @@ MODEL_SHA256: dict[Enum, str] = {
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
GazeWeights.RESNET18: '404fec1efd07ff49f981e47f461c20c2627119e465ec441bbd1c067d3f16e657',
|
||||
GazeWeights.RESNET34: 'c8e6b14f6095d2425241b9302aa663d9a23b7dfb9d43941352b718c91dc7f2cf',
|
||||
GazeWeights.RESNET50: 'bb28d421565adc4dfb665742f8fc80bdef36dd8caa0c87e040e0937f9fdca9a6',
|
||||
GazeWeights.MOBILENET_V2: 'b81312df85c7ac1c1b5f78c573620d22c2719cb839650e15f12dc7eecb7744a4',
|
||||
GazeWeights.MOBILEONE_S0: '8b4fdc4e3da44733c9a82e7776b411e4a39f94e8e285aee0fc85a548a55f7d9f',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
|
||||
@@ -6,9 +6,12 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
YOLOv5FaceWeights,
|
||||
YOLOv8FaceWeights,
|
||||
)
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
@@ -16,48 +19,6 @@ from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
from .yolov8 import YOLOv8Face
|
||||
|
||||
# Global cache for detector instances (keyed by method name + config hash)
|
||||
_detector_cache: dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
|
||||
"""High-level face detection function.
|
||||
|
||||
Detects faces in an image using the specified detection method.
|
||||
Results are cached for repeated calls with the same configuration.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face', 'yolov8face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
A list of Face objects, each containing:
|
||||
- bbox: [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence: The confidence score of the detection.
|
||||
- landmarks: 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> import cv2
|
||||
>>> image = cv2.imread('your_image.jpg')
|
||||
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f'Found face with confidence: {face.confidence}')
|
||||
... print(f'BBox: {face.bbox}')
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f'{method_name}_{sorted_kwargs!s}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
_detector_cache[cache_key] = create_detector(method, **kwargs)
|
||||
|
||||
detector = _detector_cache[cache_key]
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
@@ -122,7 +83,7 @@ def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'model_name': RetinaFaceWeights.MNET_V2.value,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
@@ -133,7 +94,7 @@ def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'model_name': SCRFDWeights.SCRFD_10G_KPS.value,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
@@ -144,9 +105,9 @@ def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'confidence_threshold': 0.25,
|
||||
'nms_threshold': 0.45,
|
||||
'model_name': YOLOv5FaceWeights.YOLOV5S.value,
|
||||
'confidence_threshold': 0.6,
|
||||
'nms_threshold': 0.5,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
@@ -155,7 +116,7 @@ def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://github.com/derronqi/yolov8-face',
|
||||
'default_params': {
|
||||
'model_name': 'yolov8n_face',
|
||||
'model_name': YOLOv8FaceWeights.YOLOV8N.value,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
@@ -171,6 +132,5 @@ __all__ = [
|
||||
'YOLOv5Face',
|
||||
'YOLOv8Face',
|
||||
'create_detector',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
]
|
||||
|
||||
475
uniface/draw.py
Normal file
475
uniface/draw.py
Normal file
@@ -0,0 +1,475 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import colorsys
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = [
|
||||
'FACE_PARSING_COLORS',
|
||||
'FACE_PARSING_LABELS',
|
||||
'calculate_optimal_line_thickness',
|
||||
'calculate_optimal_text_scale',
|
||||
'draw_corner_bbox',
|
||||
'draw_detections',
|
||||
'draw_gaze',
|
||||
'draw_text_label',
|
||||
'draw_tracks',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
# Face parsing component names (19 classes)
|
||||
FACE_PARSING_LABELS = [
|
||||
'background',
|
||||
'skin',
|
||||
'l_brow',
|
||||
'r_brow',
|
||||
'l_eye',
|
||||
'r_eye',
|
||||
'eye_g',
|
||||
'l_ear',
|
||||
'r_ear',
|
||||
'ear_r',
|
||||
'nose',
|
||||
'mouth',
|
||||
'u_lip',
|
||||
'l_lip',
|
||||
'neck',
|
||||
'neck_l',
|
||||
'cloth',
|
||||
'hair',
|
||||
'hat',
|
||||
]
|
||||
|
||||
# Color palette for face parsing visualization
|
||||
FACE_PARSING_COLORS = [
|
||||
[0, 0, 0],
|
||||
[255, 85, 0],
|
||||
[255, 170, 0],
|
||||
[255, 0, 85],
|
||||
[255, 0, 170],
|
||||
[0, 255, 0],
|
||||
[85, 255, 0],
|
||||
[170, 255, 0],
|
||||
[0, 255, 85],
|
||||
[0, 255, 170],
|
||||
[0, 0, 255],
|
||||
[85, 0, 255],
|
||||
[170, 0, 255],
|
||||
[0, 85, 255],
|
||||
[0, 170, 255],
|
||||
[255, 255, 0],
|
||||
[255, 255, 85],
|
||||
[255, 255, 170],
|
||||
[255, 0, 255],
|
||||
]
|
||||
|
||||
# Per-point colors for 5-point facial landmarks (BGR)
|
||||
_LANDMARK_COLORS = (
|
||||
(0, 0, 255),
|
||||
(0, 255, 255),
|
||||
(255, 0, 255),
|
||||
(0, 255, 0),
|
||||
(255, 0, 0),
|
||||
)
|
||||
|
||||
|
||||
def _get_color(idx: int) -> tuple[int, int, int]:
|
||||
"""Get a visually distinct BGR color for a given index.
|
||||
|
||||
Uses golden-ratio hue stepping in HSV space to maximize perceptual
|
||||
separation between consecutive indices. Works for any non-negative index.
|
||||
|
||||
Args:
|
||||
idx: Non-negative integer index (e.g. track ID).
|
||||
|
||||
Returns:
|
||||
BGR color tuple suitable for OpenCV drawing functions.
|
||||
"""
|
||||
golden_ratio = 0.618033988749895
|
||||
hue = (idx * golden_ratio) % 1.0
|
||||
# HSV -> RGB with fixed saturation=0.85 and value=0.95 for vivid colors
|
||||
r, g, b = colorsys.hsv_to_rgb(hue, 0.85, 0.95)
|
||||
return int(b * 255), int(g * 255), int(r * 255)
|
||||
|
||||
|
||||
def calculate_optimal_line_thickness(resolution_wh: tuple[int, int]) -> int:
|
||||
"""Calculate adaptive line thickness based on image resolution.
|
||||
|
||||
Args:
|
||||
resolution_wh: Image resolution as ``(width, height)``.
|
||||
|
||||
Returns:
|
||||
Recommended line thickness in pixels.
|
||||
|
||||
Example:
|
||||
>>> calculate_optimal_line_thickness((1920, 1080))
|
||||
4
|
||||
>>> calculate_optimal_line_thickness((640, 480))
|
||||
2
|
||||
"""
|
||||
return max(round(sum(resolution_wh) / 2 * 0.003), 2)
|
||||
|
||||
|
||||
def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
|
||||
"""Calculate adaptive font scale based on image resolution.
|
||||
|
||||
Args:
|
||||
resolution_wh: Image resolution as ``(width, height)``.
|
||||
|
||||
Returns:
|
||||
Recommended font scale factor.
|
||||
|
||||
Example:
|
||||
>>> calculate_optimal_text_scale((1920, 1080))
|
||||
1.08
|
||||
>>> calculate_optimal_text_scale((640, 480))
|
||||
0.48
|
||||
"""
|
||||
return min(resolution_wh) * 1e-3
|
||||
|
||||
|
||||
def draw_corner_bbox(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
color: tuple[int, int, int] = (0, 255, 0),
|
||||
thickness: int = 3,
|
||||
proportion: float = 0.2,
|
||||
) -> None:
|
||||
"""Draw a bounding box with corner brackets on an image.
|
||||
|
||||
Draws a thin full rectangle with thick corner accents, commonly used in
|
||||
face-detection overlays for a clean look.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Bounding box in xyxy format ``[x1, y1, x2, y2]``.
|
||||
color: BGR color of the box. Defaults to green ``(0, 255, 0)``.
|
||||
thickness: Thickness of corner bracket lines. Defaults to 3.
|
||||
proportion: Corner length as a fraction of the shorter side.
|
||||
Defaults to 0.2.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
corner_length = int(proportion * min(x2 - x1, y2 - y1))
|
||||
|
||||
# Thin full rectangle
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
|
||||
|
||||
# Top-left corner
|
||||
cv2.line(image, (x1, y1), (x1 + corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x1, y1), (x1, y1 + corner_length), color, thickness)
|
||||
|
||||
# Top-right corner
|
||||
cv2.line(image, (x2, y1), (x2 - corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x2, y1), (x2, y1 + corner_length), color, thickness)
|
||||
|
||||
# Bottom-left corner
|
||||
cv2.line(image, (x1, y2), (x1, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x1, y2), (x1 + corner_length, y2), color, thickness)
|
||||
|
||||
# Bottom-right corner
|
||||
cv2.line(image, (x2, y2), (x2, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x2, y2), (x2 - corner_length, y2), color, thickness)
|
||||
|
||||
|
||||
def draw_text_label(
|
||||
image: np.ndarray,
|
||||
text: str,
|
||||
x: int,
|
||||
y: int,
|
||||
bg_color: tuple[int, int, int],
|
||||
text_color: tuple[int, int, int] = (255, 255, 255),
|
||||
font_scale: float = 0.5,
|
||||
font_thickness: int = 2,
|
||||
padding: int = 5,
|
||||
) -> None:
|
||||
"""Draw text with a filled background rectangle above a given position.
|
||||
|
||||
The label is placed so that its bottom edge sits at *y*, making it
|
||||
suitable for positioning above a bounding box top-left corner.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
text: The text string to render.
|
||||
x: Left x-coordinate for the label.
|
||||
y: Bottom y-coordinate for the label (e.g. ``bbox[1]``).
|
||||
bg_color: BGR background fill color.
|
||||
text_color: BGR text color. Defaults to white.
|
||||
font_scale: OpenCV font scale factor. Defaults to 0.5.
|
||||
font_thickness: OpenCV font thickness. Defaults to 2.
|
||||
padding: Pixel padding around the text. Defaults to 5.
|
||||
"""
|
||||
(tw, th), baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(x, y - th - baseline - padding * 2),
|
||||
(x + tw + padding * 2, y),
|
||||
bg_color,
|
||||
-1,
|
||||
)
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(x + padding, y - padding),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
text_color,
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
|
||||
def draw_detections(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
bboxes: list[np.ndarray] | list[list[float]],
|
||||
scores: np.ndarray | list[float],
|
||||
landmarks: list[np.ndarray] | list[list[list[float]]],
|
||||
vis_threshold: float = 0.6,
|
||||
draw_score: bool = False,
|
||||
corner_bbox: bool = True,
|
||||
) -> None:
|
||||
"""Draw bounding boxes, landmarks, and optional scores on an image.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bboxes: List of bounding boxes in xyxy format ``[x1, y1, x2, y2]``.
|
||||
scores: List of confidence scores.
|
||||
landmarks: List of landmark sets with shape ``(5, 2)``.
|
||||
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
||||
draw_score: Whether to draw confidence scores. Defaults to False.
|
||||
corner_bbox: Use corner-style bounding boxes. Defaults to True.
|
||||
"""
|
||||
|
||||
# Adaptive line thickness
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
for i, score in enumerate(scores):
|
||||
if score < vis_threshold:
|
||||
continue
|
||||
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
|
||||
# Draw bounding box
|
||||
if corner_bbox:
|
||||
draw_corner_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness, proportion=0.2)
|
||||
else:
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 255, 0), line_thickness)
|
||||
|
||||
# Draw confidence score label
|
||||
if draw_score:
|
||||
font_scale = max(0.4, min(0.7, (bbox[3] - bbox[1]) / 200))
|
||||
draw_text_label(
|
||||
image,
|
||||
f'{score:.2f}',
|
||||
bbox[0],
|
||||
bbox[1],
|
||||
bg_color=(0, 255, 0),
|
||||
text_color=(0, 0, 0),
|
||||
font_scale=font_scale,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), line_thickness + 1, _LANDMARK_COLORS[j % len(_LANDMARK_COLORS)], -1)
|
||||
|
||||
|
||||
def draw_gaze(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
pitch: np.ndarray | float,
|
||||
yaw: np.ndarray | float,
|
||||
*,
|
||||
draw_bbox: bool = True,
|
||||
corner_bbox: bool = True,
|
||||
draw_angles: bool = True,
|
||||
) -> None:
|
||||
"""Draw gaze direction with optional bounding box on an image.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Face bounding box in xyxy format ``[x1, y1, x2, y2]``.
|
||||
pitch: Vertical gaze angle in radians.
|
||||
yaw: Horizontal gaze angle in radians.
|
||||
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
||||
corner_bbox: Use corner-style bounding box. Defaults to True.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
|
||||
"""
|
||||
|
||||
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
||||
|
||||
# Adaptive line thickness
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Draw bounding box if requested
|
||||
if draw_bbox:
|
||||
if corner_bbox:
|
||||
draw_corner_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness)
|
||||
else:
|
||||
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), line_thickness)
|
||||
|
||||
# Calculate center of the bounding box
|
||||
x_center = (x_min + x_max) // 2
|
||||
y_center = (y_min + y_max) // 2
|
||||
|
||||
# Calculate the direction of the gaze
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(yaw) * np.cos(pitch))
|
||||
dy = int(-length * np.sin(pitch))
|
||||
|
||||
# Draw gaze arrow
|
||||
center_radius = max(line_thickness + 1, 4)
|
||||
cv2.circle(image, (x_center, y_center), radius=center_radius, color=(0, 0, 255), thickness=-1)
|
||||
cv2.arrowedLine(
|
||||
image,
|
||||
(x_center, y_center),
|
||||
(x_center + dx, y_center + dy),
|
||||
color=(0, 0, 255),
|
||||
thickness=line_thickness,
|
||||
line_type=cv2.LINE_AA,
|
||||
tipLength=0.25,
|
||||
)
|
||||
|
||||
# Draw angle values
|
||||
if draw_angles:
|
||||
font_scale = max(0.4, min(0.7, (y_max - y_min) / 200))
|
||||
draw_text_label(
|
||||
image,
|
||||
f'P:{np.degrees(pitch):.0f}deg Y:{np.degrees(yaw):.0f}deg',
|
||||
x_min,
|
||||
y_min,
|
||||
bg_color=(0, 0, 255),
|
||||
text_color=(255, 255, 255),
|
||||
font_scale=font_scale,
|
||||
)
|
||||
|
||||
|
||||
def draw_tracks(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
faces: list[Face],
|
||||
draw_landmarks: bool = True,
|
||||
draw_id: bool = True,
|
||||
corner_bbox: bool = True,
|
||||
) -> None:
|
||||
"""Draw tracked faces with color-coded track IDs on an image.
|
||||
|
||||
Each track ID is assigned a deterministic color for consistent visualization
|
||||
across frames. Faces without a ``track_id`` are drawn in gray.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
faces: List of Face objects (with ``track_id`` assigned by BYTETracker).
|
||||
draw_landmarks: Whether to draw facial landmarks. Defaults to True.
|
||||
draw_id: Whether to draw track ID labels. Defaults to True.
|
||||
corner_bbox: Use corner-style bounding boxes. Defaults to True.
|
||||
|
||||
Example:
|
||||
>>> from uniface import BYTETracker, RetinaFace
|
||||
>>> from uniface.draw import draw_tracks
|
||||
>>> detector = RetinaFace()
|
||||
>>> tracker = BYTETracker()
|
||||
>>> draw_tracks(image=frame, faces=faces)
|
||||
"""
|
||||
untracked_color = (128, 128, 128)
|
||||
|
||||
# Adaptive line thickness
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
for face in faces:
|
||||
bbox = np.array(face.bbox, dtype=np.int32)
|
||||
track_id = face.track_id
|
||||
|
||||
# Pick color based on track ID
|
||||
color = _get_color(track_id) if track_id is not None else untracked_color
|
||||
|
||||
# Draw bounding box
|
||||
if corner_bbox:
|
||||
draw_corner_bbox(image, bbox, color=color, thickness=line_thickness, proportion=0.2)
|
||||
else:
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), color, line_thickness)
|
||||
|
||||
# Draw track ID label
|
||||
if draw_id and track_id is not None:
|
||||
font_scale = max(0.4, min(0.7, (bbox[3] - bbox[1]) / 200))
|
||||
draw_text_label(
|
||||
image,
|
||||
f'ID:{track_id}',
|
||||
bbox[0],
|
||||
bbox[1],
|
||||
bg_color=color,
|
||||
font_scale=font_scale,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
if draw_landmarks and face.landmarks is not None:
|
||||
landmark_set = np.array(face.landmarks, dtype=np.int32)
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), line_thickness + 1, _LANDMARK_COLORS[j % len(_LANDMARK_COLORS)], -1)
|
||||
|
||||
|
||||
def vis_parsing_maps(
|
||||
image: np.ndarray,
|
||||
segmentation_mask: np.ndarray,
|
||||
*,
|
||||
save_image: bool = False,
|
||||
save_path: str = 'result.png',
|
||||
) -> np.ndarray:
|
||||
"""Visualize face parsing segmentation mask by overlaying colored regions.
|
||||
|
||||
Args:
|
||||
image: Input face image in RGB format with shape ``(H, W, 3)``.
|
||||
segmentation_mask: Segmentation mask with shape ``(H, W)`` where each
|
||||
pixel value represents a facial component class (0-18).
|
||||
save_image: Whether to save the visualization to disk. Defaults to False.
|
||||
save_path: Path to save the visualization if *save_image* is True.
|
||||
|
||||
Returns:
|
||||
Blended image with segmentation overlay in BGR format.
|
||||
|
||||
Example:
|
||||
>>> import cv2
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface.draw import vis_parsing_maps
|
||||
>>> parser = BiSeNet()
|
||||
>>> face_image = cv2.imread('face.jpg')
|
||||
>>> mask = parser.parse(face_image)
|
||||
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
>>> result = vis_parsing_maps(face_rgb, mask)
|
||||
>>> cv2.imwrite('parsed_face.jpg', result)
|
||||
"""
|
||||
image = np.array(image).copy().astype(np.uint8)
|
||||
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
|
||||
|
||||
# Create a color mask
|
||||
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
|
||||
|
||||
num_classes = np.max(segmentation_mask)
|
||||
for class_index in range(1, num_classes + 1):
|
||||
class_pixels = np.where(segmentation_mask == class_index)
|
||||
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
|
||||
|
||||
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
|
||||
|
||||
# Convert image to BGR format for blending
|
||||
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
blended_image = cv2.addWeighted(bgr_image, 0.6, segmentation_mask_color, 0.4, 0)
|
||||
|
||||
if save_image:
|
||||
cv2.imwrite(save_path, blended_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
return blended_image
|
||||
@@ -31,7 +31,7 @@ def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstim
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default MobileGaze estimator (ResNet18 backbone)
|
||||
>>> # Create the default MobileGaze estimator (ResNet34 backbone)
|
||||
>>> estimator = create_gaze_estimator()
|
||||
|
||||
>>> # Create with MobileNetV2 backbone
|
||||
|
||||
@@ -106,7 +106,7 @@ class MobileGaze(BaseGazeEstimator):
|
||||
self.output_names = [output.name for output in outputs]
|
||||
|
||||
if len(self.output_names) != 2:
|
||||
raise ValueError(f'Expected 2 output nodes (pitch, yaw), got {len(self.output_names)}')
|
||||
raise ValueError(f'Expected 2 output nodes (yaw, pitch), got {len(self.output_names)}')
|
||||
|
||||
Logger.info(f'MobileGaze initialized with input size {self.input_size}')
|
||||
|
||||
@@ -161,19 +161,19 @@ class MobileGaze(BaseGazeEstimator):
|
||||
Returns:
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
pitch_logits, yaw_logits = outputs
|
||||
yaw_logits, pitch_logits = outputs
|
||||
|
||||
# Convert logits to probabilities
|
||||
pitch_probs = self._softmax(pitch_logits)
|
||||
yaw_probs = self._softmax(yaw_logits)
|
||||
pitch_probs = self._softmax(pitch_logits)
|
||||
|
||||
# Compute expected bin index (soft-argmax)
|
||||
pitch_deg = np.sum(pitch_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
pitch_deg = np.sum(pitch_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
|
||||
# Convert degrees to radians
|
||||
pitch = float(np.radians(pitch_deg[0]))
|
||||
yaw = float(np.radians(yaw_deg[0]))
|
||||
pitch = float(np.radians(pitch_deg[0]))
|
||||
|
||||
return GazeResult(pitch=pitch, yaw=yaw)
|
||||
|
||||
|
||||
@@ -11,17 +11,18 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
Factory function to create facial landmark predictors.
|
||||
|
||||
Args:
|
||||
method (str): Landmark prediction method. Options: '106'.
|
||||
method (str): Landmark prediction method.
|
||||
Options: '2d106det' (default), 'landmark106', '106'.
|
||||
**kwargs: Model-specific parameters.
|
||||
|
||||
Returns:
|
||||
Initialized landmarker instance.
|
||||
"""
|
||||
method = method.lower()
|
||||
if method == '2d106det':
|
||||
if method in ('2d106det', 'landmark106', '106'):
|
||||
return Landmark106(**kwargs)
|
||||
else:
|
||||
available = ['2d106det']
|
||||
available = ['2d106det', 'landmark106', '106']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ using SHA-256 checksums for integrity validation.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from enum import Enum
|
||||
import hashlib
|
||||
import os
|
||||
@@ -20,10 +21,50 @@ from tqdm import tqdm
|
||||
import uniface.constants as const
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['verify_model_weights']
|
||||
__all__ = ['download_models', 'get_cache_dir', 'set_cache_dir', 'verify_model_weights']
|
||||
|
||||
_DEFAULT_CACHE_DIR = '~/.uniface/models'
|
||||
_ENV_KEY = 'UNIFACE_CACHE_DIR'
|
||||
|
||||
|
||||
def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> str:
|
||||
def get_cache_dir() -> str:
|
||||
"""Get the current model cache directory path.
|
||||
|
||||
Resolution order:
|
||||
1. ``UNIFACE_CACHE_DIR`` environment variable (set via :func:`set_cache_dir` or directly).
|
||||
2. Default: ``~/.uniface/models``.
|
||||
|
||||
Returns:
|
||||
Absolute, expanded path to the cache directory.
|
||||
|
||||
Example:
|
||||
>>> from uniface import get_cache_dir
|
||||
>>> print(get_cache_dir())
|
||||
'/home/user/.uniface/models'
|
||||
"""
|
||||
return os.path.expanduser(os.environ.get(_ENV_KEY, _DEFAULT_CACHE_DIR))
|
||||
|
||||
|
||||
def set_cache_dir(path: str) -> None:
|
||||
"""Set the model cache directory.
|
||||
|
||||
This sets the ``UNIFACE_CACHE_DIR`` environment variable so that all
|
||||
subsequent model downloads and lookups use the new path.
|
||||
|
||||
Args:
|
||||
path: Directory path for storing model weights.
|
||||
|
||||
Example:
|
||||
>>> from uniface import set_cache_dir, get_cache_dir
|
||||
>>> set_cache_dir('/data/models')
|
||||
>>> print(get_cache_dir())
|
||||
'/data/models'
|
||||
"""
|
||||
os.environ[_ENV_KEY] = path
|
||||
Logger.info(f'Cache directory set to: {path}')
|
||||
|
||||
|
||||
def verify_model_weights(model_name: Enum, root: str | None = None) -> str:
|
||||
"""Ensure model weights are present, downloading and verifying them if necessary.
|
||||
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`),
|
||||
@@ -34,7 +75,7 @@ def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> s
|
||||
Args:
|
||||
model_name: Model weight identifier enum (e.g., `RetinaFaceWeights.MNET_V2`).
|
||||
root: Directory to store or locate the model weights.
|
||||
Defaults to '~/.uniface/models'.
|
||||
If None, uses the cache directory from :func:`get_cache_dir`.
|
||||
|
||||
Returns:
|
||||
Absolute path to the verified model weights file.
|
||||
@@ -51,8 +92,7 @@ def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> s
|
||||
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
|
||||
"""
|
||||
|
||||
root = os.getenv('UNIFACE_CACHE_DIR', root)
|
||||
root = os.path.expanduser(root)
|
||||
root = os.path.expanduser(root) if root is not None else get_cache_dir()
|
||||
os.makedirs(root, exist_ok=True)
|
||||
|
||||
# Keep model_name as enum for dictionary lookup
|
||||
@@ -122,9 +162,50 @@ def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model_names = [model.value for model in const.RetinaFaceWeights]
|
||||
def download_models(model_names: list[Enum], max_workers: int = 4) -> dict[Enum, str]:
|
||||
"""Download and verify multiple models concurrently.
|
||||
|
||||
# Download each model in the list
|
||||
for model_name in model_names:
|
||||
model_path = verify_model_weights(model_name)
|
||||
Uses a thread pool to download models in parallel, which is significantly
|
||||
faster when initializing several models at once.
|
||||
|
||||
Args:
|
||||
model_names: List of model weight enum identifiers to download.
|
||||
max_workers: Maximum number of concurrent download threads. Defaults to 4.
|
||||
|
||||
Returns:
|
||||
Mapping of each model enum to its local file path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If any model download or verification fails.
|
||||
|
||||
Example:
|
||||
>>> from uniface import download_models
|
||||
>>> from uniface.constants import RetinaFaceWeights, ArcFaceWeights
|
||||
>>> paths = download_models([RetinaFaceWeights.MNET_V2, ArcFaceWeights.RESNET])
|
||||
"""
|
||||
results: dict[Enum, str] = {}
|
||||
errors: list[str] = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
||||
future_to_model = {executor.submit(verify_model_weights, name): name for name in model_names}
|
||||
|
||||
for future in as_completed(future_to_model):
|
||||
model = future_to_model[future]
|
||||
try:
|
||||
path = future.result()
|
||||
results[model] = path
|
||||
Logger.info(f'Ready: {model.value} -> {path}')
|
||||
except Exception as e:
|
||||
errors.append(f'{model.value}: {e}')
|
||||
Logger.error(f'Failed to download {model.value}: {e}')
|
||||
|
||||
if errors:
|
||||
raise RuntimeError(f'Failed to download {len(errors)} model(s):\n' + '\n'.join(errors))
|
||||
|
||||
Logger.info(f'All {len(results)} model(s) downloaded and verified')
|
||||
return results
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for model in const.RetinaFaceWeights:
|
||||
model_path = verify_model_weights(model)
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
@@ -69,7 +71,7 @@ class BaseFaceParser(ABC):
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
def parse(self, image: np.ndarray, *, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
@@ -77,9 +79,11 @@ class BaseFaceParser(ABC):
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
The face should be roughly centered and
|
||||
well-framed within the image.
|
||||
image (np.ndarray): A face image in BGR format.
|
||||
The face should be roughly centered and well-framed within the image.
|
||||
landmarks (np.ndarray | None): Optional 5-point facial landmarks with
|
||||
shape (5, 2). Required by some parsers (e.g., XSeg) for face alignment.
|
||||
Ignored by parsers that do not need landmarks (e.g., BiSeNet).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image,
|
||||
@@ -92,14 +96,15 @@ class BaseFaceParser(ABC):
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the parse method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> np.ndarray:
|
||||
def __call__(self, image: np.ndarray, *, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `parse` method.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
image (np.ndarray): A face image in BGR format.
|
||||
landmarks (np.ndarray | None): Optional 5-point facial landmarks.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
return self.parse(face_image)
|
||||
return self.parse(image, landmarks=landmarks)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -149,21 +150,26 @@ class BiSeNet(BaseFaceParser):
|
||||
|
||||
return restored_mask
|
||||
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
def parse(self, image: np.ndarray, *, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
BiSeNet operates on face crops and does not require landmarks.
|
||||
The ``landmarks`` parameter is accepted for API compatibility but ignored.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
image (np.ndarray): A face image in BGR format.
|
||||
landmarks (np.ndarray | None): Ignored. Accepted for interface
|
||||
compatibility with :class:`BaseFaceParser`.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
original_size = (face_image.shape[1], face_image.shape[0]) # (width, height)
|
||||
input_tensor = self.preprocess(face_image)
|
||||
original_size = (image.shape[1], image.shape[0]) # (width, height)
|
||||
input_tensor = self.preprocess(image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
|
||||
return self.postprocess(outputs[0], original_size)
|
||||
|
||||
@@ -54,7 +54,7 @@ class XSeg(BaseFaceParser):
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... if face.landmarks is not None:
|
||||
... mask = parser.parse(image, face.landmarks)
|
||||
... mask = parser.parse(image, landmarks=face.landmarks)
|
||||
... print(f'Mask shape: {mask.shape}')
|
||||
"""
|
||||
|
||||
@@ -151,20 +151,28 @@ class XSeg(BaseFaceParser):
|
||||
|
||||
return mask
|
||||
|
||||
def parse(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
def parse(self, image: np.ndarray, *, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Perform face segmentation using 5-point landmarks.
|
||||
|
||||
XSeg requires landmarks for face alignment. Unlike BiSeNet, calling
|
||||
this method without landmarks will raise a :class:`ValueError`.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format.
|
||||
landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
|
||||
landmarks (np.ndarray | None): 5-point facial landmarks with shape (5, 2).
|
||||
Required for XSeg face alignment.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask in original image space, values in [0, 1].
|
||||
|
||||
Raises:
|
||||
ValueError: If landmarks shape is not (5, 2).
|
||||
ValueError: If landmarks is None or has incorrect shape.
|
||||
"""
|
||||
if landmarks is None:
|
||||
raise ValueError(
|
||||
'XSeg requires 5-point facial landmarks for face alignment. Pass landmarks=... with shape (5, 2).'
|
||||
)
|
||||
if landmarks.shape != (5, 2):
|
||||
raise ValueError(f'Landmarks must have shape (5, 2), got {landmarks.shape}')
|
||||
|
||||
|
||||
@@ -2,51 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
from .blur import BlurFace, EllipticalBlur
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .blur import BlurFace
|
||||
|
||||
|
||||
def anonymize_faces(
|
||||
image: np.ndarray,
|
||||
detector: object | None = None,
|
||||
method: str = 'pixelate',
|
||||
blur_strength: float = 3.0,
|
||||
pixel_blocks: int = 10,
|
||||
confidence_threshold: float = 0.5,
|
||||
**kwargs,
|
||||
) -> np.ndarray:
|
||||
"""One-line face anonymization with automatic detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
detector: Face detector instance. Creates RetinaFace if None.
|
||||
method (str): Blur method name. Defaults to 'pixelate'.
|
||||
blur_strength (float): Blur intensity. Defaults to 3.0.
|
||||
pixel_blocks (int): Block count for pixelate. Defaults to 10.
|
||||
confidence_threshold (float): Detection confidence threshold. Defaults to 0.5.
|
||||
**kwargs: Additional detector arguments.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anonymized image.
|
||||
|
||||
Example:
|
||||
>>> from uniface.privacy import anonymize_faces
|
||||
>>> anonymized = anonymize_faces(image, method='pixelate')
|
||||
"""
|
||||
if detector is None:
|
||||
try:
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace(confidence_threshold=confidence_threshold, **kwargs)
|
||||
except ImportError as err:
|
||||
raise ImportError('Could not import RetinaFace. Please ensure UniFace is properly installed.') from err
|
||||
|
||||
faces = detector.detect(image)
|
||||
blurrer = BlurFace(method=method, blur_strength=blur_strength, pixel_blocks=pixel_blocks)
|
||||
return blurrer.anonymize(image, faces)
|
||||
|
||||
|
||||
__all__ = ['BlurFace', 'anonymize_faces']
|
||||
__all__ = ['BlurFace', 'EllipticalBlur']
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
@@ -22,6 +24,7 @@ __all__ = [
|
||||
def create_spoofer(
|
||||
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
|
||||
scale: float | None = None,
|
||||
**kwargs: Any,
|
||||
) -> MiniFASNet:
|
||||
"""Factory function to create a face anti-spoofing model.
|
||||
|
||||
@@ -34,6 +37,8 @@ def create_spoofer(
|
||||
- MiniFASNetWeights.V1SE: Squeeze-and-excitation version, uses scale=4.0
|
||||
scale: Custom crop scale factor for face region. If None, uses the
|
||||
default scale for the selected model variant.
|
||||
**kwargs: Additional keyword arguments forwarded to MiniFASNet
|
||||
(e.g. ``providers=['CUDAExecutionProvider']``).
|
||||
|
||||
Returns:
|
||||
An initialized face anti-spoofing model.
|
||||
@@ -44,4 +49,4 @@ def create_spoofer(
|
||||
>>> result = spoofer.predict(image, face.bbox)
|
||||
>>> print(f'Is real: {result.is_real}, Confidence: {result.confidence:.2%}')
|
||||
"""
|
||||
return MiniFASNet(model_name=model_name, scale=scale)
|
||||
return MiniFASNet(model_name=model_name, scale=scale, **kwargs)
|
||||
|
||||
9
uniface/tracking/__init__.py
Normal file
9
uniface/tracking/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .bytetrack import BYTETracker
|
||||
|
||||
__all__ = ['BYTETracker']
|
||||
9
uniface/tracking/bytetrack/__init__.py
Normal file
9
uniface/tracking/bytetrack/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .kalman import KalmanFilter
|
||||
from .tracker import BYTETracker, STrack
|
||||
|
||||
__all__ = ['BYTETracker', 'STrack', 'BaseTrack', 'TrackState', 'KalmanFilter']
|
||||
71
uniface/tracking/bytetrack/basetrack.py
Normal file
71
uniface/tracking/bytetrack/basetrack.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TrackState:
|
||||
"""Track state enumeration."""
|
||||
|
||||
New = 0
|
||||
Tracked = 1
|
||||
Lost = 2
|
||||
Removed = 3
|
||||
|
||||
|
||||
class BaseTrack:
|
||||
"""Base class for tracked objects."""
|
||||
|
||||
_count = 0
|
||||
|
||||
track_id = 0
|
||||
is_activated = False
|
||||
state = TrackState.New
|
||||
|
||||
history = OrderedDict() # noqa: RUF012
|
||||
features = [] # noqa: RUF012
|
||||
curr_feature = None
|
||||
score = 0
|
||||
start_frame = 0
|
||||
frame_id = 0
|
||||
time_since_update = 0
|
||||
|
||||
location = (np.inf, np.inf)
|
||||
|
||||
@property
|
||||
def end_frame(self) -> int:
|
||||
"""Return the last frame ID."""
|
||||
return self.frame_id
|
||||
|
||||
@staticmethod
|
||||
def next_id() -> int:
|
||||
"""Generate next unique track ID."""
|
||||
BaseTrack._count += 1
|
||||
return BaseTrack._count
|
||||
|
||||
@staticmethod
|
||||
def reset_id() -> None:
|
||||
"""Reset the ID counter."""
|
||||
BaseTrack._count = 0
|
||||
|
||||
def activate(self, *args):
|
||||
raise NotImplementedError
|
||||
|
||||
def predict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def update(self, *args, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def mark_lost(self) -> None:
|
||||
"""Mark track as lost."""
|
||||
self.state = TrackState.Lost
|
||||
|
||||
def mark_removed(self) -> None:
|
||||
"""Mark track as removed."""
|
||||
self.state = TrackState.Removed
|
||||
166
uniface/tracking/bytetrack/kalman.py
Normal file
166
uniface/tracking/bytetrack/kalman.py
Normal file
@@ -0,0 +1,166 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
|
||||
|
||||
class KalmanFilter:
|
||||
"""Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
State space (8-dim): [x, y, a, h, vx, vy, va, vh]
|
||||
- (x, y): bounding box center
|
||||
- a: aspect ratio (width / height)
|
||||
- h: height
|
||||
- vx, vy, va, vh: respective velocities
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
ndim, dt = 4, 1.0
|
||||
|
||||
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
|
||||
for i in range(ndim):
|
||||
self._motion_mat[i, ndim + i] = dt
|
||||
self._update_mat = np.eye(ndim, 2 * ndim)
|
||||
|
||||
self._std_weight_position = 1.0 / 20
|
||||
self._std_weight_velocity = 1.0 / 160
|
||||
|
||||
def initiate(self, measurement: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Create track from unassociated measurement.
|
||||
|
||||
Args:
|
||||
measurement: Bounding box (x, y, a, h).
|
||||
|
||||
Returns:
|
||||
Mean vector (8-dim) and covariance matrix (8x8).
|
||||
"""
|
||||
mean_pos = measurement
|
||||
mean_vel = np.zeros_like(mean_pos)
|
||||
mean = np.r_[mean_pos, mean_vel]
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
]
|
||||
covariance = np.diag(np.square(std))
|
||||
return mean, covariance
|
||||
|
||||
def predict(self, mean: np.ndarray, covariance: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Run Kalman filter prediction step.
|
||||
|
||||
Args:
|
||||
mean: 8-dim mean vector of previous state.
|
||||
covariance: 8x8 covariance matrix of previous state.
|
||||
|
||||
Returns:
|
||||
Predicted mean and covariance.
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-2,
|
||||
self._std_weight_position * mean[3],
|
||||
]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[3],
|
||||
self._std_weight_velocity * mean[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * mean[3],
|
||||
]
|
||||
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
mean = np.dot(mean, self._motion_mat.T)
|
||||
covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def project(self, mean: np.ndarray, covariance: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Project state distribution to measurement space.
|
||||
|
||||
Args:
|
||||
mean: 8-dim state mean vector.
|
||||
covariance: 8x8 state covariance matrix.
|
||||
|
||||
Returns:
|
||||
Projected mean (4-dim) and covariance (4x4).
|
||||
"""
|
||||
std = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-1,
|
||||
self._std_weight_position * mean[3],
|
||||
]
|
||||
innovation_cov = np.diag(np.square(std))
|
||||
|
||||
mean = np.dot(self._update_mat, mean)
|
||||
covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T))
|
||||
return mean, covariance + innovation_cov
|
||||
|
||||
def multi_predict(self, mean: np.ndarray, covariance: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Vectorized Kalman filter prediction for multiple objects.
|
||||
|
||||
Args:
|
||||
mean: Nx8 mean matrix.
|
||||
covariance: Nx8x8 covariance matrices.
|
||||
|
||||
Returns:
|
||||
Predicted means and covariances.
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[:, 3],
|
||||
self._std_weight_position * mean[:, 3],
|
||||
1e-2 * np.ones_like(mean[:, 3]),
|
||||
self._std_weight_position * mean[:, 3],
|
||||
]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[:, 3],
|
||||
self._std_weight_velocity * mean[:, 3],
|
||||
1e-5 * np.ones_like(mean[:, 3]),
|
||||
self._std_weight_velocity * mean[:, 3],
|
||||
]
|
||||
sqr = np.square(np.r_[std_pos, std_vel]).T
|
||||
|
||||
motion_cov = np.array([np.diag(sqr[i]) for i in range(len(mean))])
|
||||
|
||||
mean = np.dot(mean, self._motion_mat.T)
|
||||
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
|
||||
covariance = np.dot(left, self._motion_mat.T) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def update(
|
||||
self, mean: np.ndarray, covariance: np.ndarray, measurement: np.ndarray
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Run Kalman filter correction step.
|
||||
|
||||
Args:
|
||||
mean: 8-dim predicted state mean.
|
||||
covariance: 8x8 predicted state covariance.
|
||||
measurement: 4-dim measurement (x, y, a, h).
|
||||
|
||||
Returns:
|
||||
Corrected mean and covariance.
|
||||
"""
|
||||
projected_mean, projected_cov = self.project(mean, covariance)
|
||||
|
||||
chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False)
|
||||
kalman_gain = scipy.linalg.cho_solve(
|
||||
(chol_factor, lower),
|
||||
np.dot(covariance, self._update_mat.T).T,
|
||||
check_finite=False,
|
||||
).T
|
||||
innovation = measurement - projected_mean
|
||||
|
||||
new_mean = mean + np.dot(innovation, kalman_gain.T)
|
||||
new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T))
|
||||
return new_mean, new_covariance
|
||||
117
uniface/tracking/bytetrack/matching.py
Normal file
117
uniface/tracking/bytetrack/matching.py
Normal file
@@ -0,0 +1,117 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
|
||||
try:
|
||||
import lap
|
||||
|
||||
LAP_AVAILABLE = True
|
||||
except ImportError:
|
||||
LAP_AVAILABLE = False
|
||||
|
||||
|
||||
def linear_assignment(
|
||||
cost_matrix: np.ndarray, thresh: float, use_lap: bool = True
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Perform linear assignment using lap or scipy.
|
||||
|
||||
Args:
|
||||
cost_matrix: Cost matrix of shape (N, M).
|
||||
thresh: Maximum cost threshold for valid assignment.
|
||||
use_lap: Use lap.lapjv (faster) if available.
|
||||
|
||||
Returns:
|
||||
Tuple of (matches, unmatched_a, unmatched_b).
|
||||
"""
|
||||
if cost_matrix.size == 0:
|
||||
return (
|
||||
np.empty((0, 2), dtype=int),
|
||||
np.arange(cost_matrix.shape[0]),
|
||||
np.arange(cost_matrix.shape[1]),
|
||||
)
|
||||
|
||||
if use_lap and LAP_AVAILABLE:
|
||||
_, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
|
||||
matches = [[ix, mx] for ix, mx in enumerate(x) if mx >= 0]
|
||||
unmatched_a = np.where(x < 0)[0]
|
||||
unmatched_b = np.where(y < 0)[0]
|
||||
else:
|
||||
row_ind, col_ind = linear_sum_assignment(cost_matrix)
|
||||
matches = [[r, c] for r, c in zip(row_ind, col_ind, strict=False) if cost_matrix[r, c] <= thresh]
|
||||
if len(matches) == 0:
|
||||
unmatched_a = np.arange(cost_matrix.shape[0])
|
||||
unmatched_b = np.arange(cost_matrix.shape[1])
|
||||
else:
|
||||
matches_arr = np.array(matches)
|
||||
unmatched_a = np.array([i for i in range(cost_matrix.shape[0]) if i not in matches_arr[:, 0]])
|
||||
unmatched_b = np.array([i for i in range(cost_matrix.shape[1]) if i not in matches_arr[:, 1]])
|
||||
|
||||
matches = np.asarray(matches) if matches else np.empty((0, 2), dtype=int)
|
||||
return matches, unmatched_a, unmatched_b
|
||||
|
||||
|
||||
def iou_batch(bboxes1: np.ndarray, bboxes2: np.ndarray) -> np.ndarray:
|
||||
"""Compute IoU between two sets of bounding boxes.
|
||||
|
||||
Args:
|
||||
bboxes1: Array of shape (N, 4) in [x1, y1, x2, y2] format.
|
||||
bboxes2: Array of shape (M, 4) in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
IoU matrix of shape (N, M).
|
||||
"""
|
||||
bboxes1 = np.ascontiguousarray(bboxes1, dtype=np.float32)
|
||||
bboxes2 = np.ascontiguousarray(bboxes2, dtype=np.float32)
|
||||
|
||||
rows, cols = bboxes1.shape[0], bboxes2.shape[0]
|
||||
ious = np.zeros((rows, cols), dtype=np.float32)
|
||||
|
||||
if rows * cols == 0:
|
||||
return ious
|
||||
|
||||
x1 = np.maximum(bboxes1[:, None, 0], bboxes2[None, :, 0])
|
||||
y1 = np.maximum(bboxes1[:, None, 1], bboxes2[None, :, 1])
|
||||
x2 = np.minimum(bboxes1[:, None, 2], bboxes2[None, :, 2])
|
||||
y2 = np.minimum(bboxes1[:, None, 3], bboxes2[None, :, 3])
|
||||
|
||||
intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
|
||||
|
||||
area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
|
||||
area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
|
||||
|
||||
union = area1[:, None] + area2[None, :] - intersection
|
||||
ious = intersection / np.maximum(union, 1e-10)
|
||||
|
||||
return ious
|
||||
|
||||
|
||||
def iou_distance(atracks: list, btracks: list) -> np.ndarray:
|
||||
"""Compute cost matrix based on IoU distance.
|
||||
|
||||
Args:
|
||||
atracks: List of tracks or bounding boxes.
|
||||
btracks: List of tracks or bounding boxes.
|
||||
|
||||
Returns:
|
||||
Cost matrix (1 - IoU) of shape (len(atracks), len(btracks)).
|
||||
"""
|
||||
if len(atracks) > 0 and isinstance(atracks[0], np.ndarray):
|
||||
atlbrs = atracks
|
||||
else:
|
||||
atlbrs = [track.tlbr for track in atracks]
|
||||
|
||||
if len(btracks) > 0 and isinstance(btracks[0], np.ndarray):
|
||||
btlbrs = btracks
|
||||
else:
|
||||
btlbrs = [track.tlbr for track in btracks]
|
||||
|
||||
atlbrs = np.asarray(atlbrs) if len(atlbrs) > 0 else np.empty((0, 4))
|
||||
btlbrs = np.asarray(btlbrs) if len(btlbrs) > 0 else np.empty((0, 4))
|
||||
|
||||
ious = iou_batch(atlbrs, btlbrs)
|
||||
return 1 - ious
|
||||
352
uniface/tracking/bytetrack/tracker.py
Normal file
352
uniface/tracking/bytetrack/tracker.py
Normal file
@@ -0,0 +1,352 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import matching
|
||||
from .basetrack import BaseTrack, TrackState
|
||||
from .kalman import KalmanFilter
|
||||
|
||||
|
||||
class STrack(BaseTrack):
|
||||
"""Single object track using Kalman filter."""
|
||||
|
||||
shared_kalman = KalmanFilter()
|
||||
|
||||
def __init__(self, tlwh: np.ndarray, score: float) -> None:
|
||||
"""Initialize STrack.
|
||||
|
||||
Args:
|
||||
tlwh: Bounding box in [x, y, w, h] format (top-left).
|
||||
score: Detection confidence score.
|
||||
"""
|
||||
super().__init__()
|
||||
self._tlwh = np.asarray(tlwh, dtype=np.float32)
|
||||
self.kalman_filter = None
|
||||
self.mean = None
|
||||
self.covariance = None
|
||||
self.is_activated = False
|
||||
self.score = score
|
||||
self.tracklet_len = 0
|
||||
|
||||
def predict(self) -> None:
|
||||
"""Predict next state using Kalman filter."""
|
||||
mean_state = self.mean.copy()
|
||||
if self.state != TrackState.Tracked:
|
||||
mean_state[7] = 0
|
||||
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
|
||||
|
||||
@staticmethod
|
||||
def multi_predict(stracks: list) -> None:
|
||||
"""Predict states for multiple tracks (vectorized).
|
||||
|
||||
Args:
|
||||
stracks: List of STrack objects.
|
||||
"""
|
||||
if len(stracks) == 0:
|
||||
return
|
||||
|
||||
multi_mean = np.asarray([st.mean.copy() for st in stracks])
|
||||
multi_covariance = np.asarray([st.covariance for st in stracks])
|
||||
|
||||
for i, st in enumerate(stracks):
|
||||
if st.state != TrackState.Tracked:
|
||||
multi_mean[i][7] = 0
|
||||
|
||||
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
|
||||
|
||||
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance, strict=False)):
|
||||
stracks[i].mean = mean
|
||||
stracks[i].covariance = cov
|
||||
|
||||
def activate(self, kalman_filter: KalmanFilter, frame_id: int) -> None:
|
||||
"""Start a new tracklet.
|
||||
|
||||
Args:
|
||||
kalman_filter: Kalman filter instance.
|
||||
frame_id: Current frame number.
|
||||
"""
|
||||
self.kalman_filter = kalman_filter
|
||||
self.track_id = self.next_id()
|
||||
self.mean, self.covariance = self.kalman_filter.initiate(self._tlwh_to_xyah(self._tlwh))
|
||||
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
if frame_id == 1:
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
self.start_frame = frame_id
|
||||
|
||||
def re_activate(self, new_track: STrack, frame_id: int, new_id: bool = False) -> None:
|
||||
"""Reactivate a lost track.
|
||||
|
||||
Args:
|
||||
new_track: New detection to reactivate with.
|
||||
frame_id: Current frame number.
|
||||
new_id: Whether to assign a new track ID.
|
||||
"""
|
||||
self.mean, self.covariance = self.kalman_filter.update(
|
||||
self.mean, self.covariance, self._tlwh_to_xyah(new_track.tlwh)
|
||||
)
|
||||
self.tracklet_len = 0
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.frame_id = frame_id
|
||||
if new_id:
|
||||
self.track_id = self.next_id()
|
||||
self.score = new_track.score
|
||||
|
||||
def update(self, new_track: STrack, frame_id: int) -> None:
|
||||
"""Update matched track with new detection.
|
||||
|
||||
Args:
|
||||
new_track: Matched detection.
|
||||
frame_id: Current frame number.
|
||||
"""
|
||||
self.frame_id = frame_id
|
||||
self.tracklet_len += 1
|
||||
|
||||
self.mean, self.covariance = self.kalman_filter.update(
|
||||
self.mean, self.covariance, self._tlwh_to_xyah(new_track.tlwh)
|
||||
)
|
||||
self.state = TrackState.Tracked
|
||||
self.is_activated = True
|
||||
self.score = new_track.score
|
||||
|
||||
@property
|
||||
def tlwh(self) -> np.ndarray:
|
||||
"""Get bounding box in [x, y, w, h] format (top-left)."""
|
||||
if self.mean is None:
|
||||
return self._tlwh.copy()
|
||||
ret = self.mean[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
@property
|
||||
def tlbr(self) -> np.ndarray:
|
||||
"""Get bounding box in [x1, y1, x2, y2] format."""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def _tlwh_to_xyah(tlwh: np.ndarray) -> np.ndarray:
|
||||
"""Convert [x, y, w, h] to [cx, cy, aspect_ratio, height]."""
|
||||
ret = np.asarray(tlwh).copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
||||
|
||||
@staticmethod
|
||||
def _tlbr_to_tlwh(tlbr: np.ndarray) -> np.ndarray:
|
||||
"""Convert [x1, y1, x2, y2] to [x, y, w, h]."""
|
||||
ret = np.asarray(tlbr).copy()
|
||||
ret[2:] -= ret[:2]
|
||||
return ret
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'OT_{self.track_id}_({self.start_frame}-{self.end_frame})'
|
||||
|
||||
|
||||
class BYTETracker:
|
||||
"""ByteTrack multi-object tracker.
|
||||
|
||||
Uses two-stage association to match both high and low confidence detections.
|
||||
|
||||
Args:
|
||||
track_thresh: High confidence detection threshold.
|
||||
track_buffer: Maximum frames to keep lost tracks.
|
||||
match_thresh: IoU threshold for first association.
|
||||
low_thresh: Low confidence detection threshold.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
track_thresh: float = 0.5,
|
||||
track_buffer: int = 30,
|
||||
match_thresh: float = 0.8,
|
||||
low_thresh: float = 0.1,
|
||||
) -> None:
|
||||
self.track_thresh = track_thresh
|
||||
self.match_thresh = match_thresh
|
||||
self.low_thresh = low_thresh
|
||||
self.buffer_size = track_buffer
|
||||
|
||||
self.frame_id = 0
|
||||
self.kalman_filter = KalmanFilter()
|
||||
self.tracked_stracks: list[STrack] = []
|
||||
self.lost_stracks: list[STrack] = []
|
||||
self.removed_stracks: list[STrack] = []
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset tracker state."""
|
||||
self.frame_id = 0
|
||||
self.tracked_stracks = []
|
||||
self.lost_stracks = []
|
||||
self.removed_stracks = []
|
||||
BaseTrack.reset_id()
|
||||
|
||||
def update(self, detections: np.ndarray) -> np.ndarray:
|
||||
"""Update tracker with new detections.
|
||||
|
||||
Args:
|
||||
detections: Array of shape (N, 5) with [x1, y1, x2, y2, score].
|
||||
|
||||
Returns:
|
||||
Array of shape (M, 5) with [x1, y1, x2, y2, track_id].
|
||||
"""
|
||||
self.frame_id += 1
|
||||
activated_stracks = []
|
||||
refind_stracks = []
|
||||
lost_stracks = []
|
||||
removed_stracks = []
|
||||
|
||||
if len(detections) == 0:
|
||||
detections = np.empty((0, 5))
|
||||
|
||||
scores = detections[:, 4]
|
||||
bboxes = detections[:, :4]
|
||||
|
||||
# Split detections into high and low confidence
|
||||
high_inds = scores >= self.track_thresh
|
||||
low_inds = (scores > self.low_thresh) & (scores < self.track_thresh)
|
||||
|
||||
dets_high = bboxes[high_inds]
|
||||
scores_high = scores[high_inds]
|
||||
dets_low = bboxes[low_inds]
|
||||
scores_low = scores[low_inds]
|
||||
|
||||
# Create STrack objects
|
||||
detections_high = [
|
||||
STrack(STrack._tlbr_to_tlwh(tlbr), s) for tlbr, s in zip(dets_high, scores_high, strict=False)
|
||||
]
|
||||
detections_low = [STrack(STrack._tlbr_to_tlwh(tlbr), s) for tlbr, s in zip(dets_low, scores_low, strict=False)]
|
||||
|
||||
# Separate confirmed and unconfirmed tracks
|
||||
unconfirmed = [t for t in self.tracked_stracks if not t.is_activated]
|
||||
tracked_stracks = [t for t in self.tracked_stracks if t.is_activated]
|
||||
|
||||
# Step 1: First association with high confidence detections
|
||||
strack_pool = _joint_stracks(tracked_stracks, self.lost_stracks)
|
||||
STrack.multi_predict(strack_pool)
|
||||
|
||||
dists = matching.iou_distance(strack_pool, detections_high)
|
||||
matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.match_thresh)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = strack_pool[itracked]
|
||||
det = detections_high[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
# Step 2: Second association with low confidence detections
|
||||
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
|
||||
dists = matching.iou_distance(r_tracked_stracks, detections_low)
|
||||
matches, u_track_second, _ = matching.linear_assignment(dists, thresh=0.5)
|
||||
|
||||
for itracked, idet in matches:
|
||||
track = r_tracked_stracks[itracked]
|
||||
det = detections_low[idet]
|
||||
if track.state == TrackState.Tracked:
|
||||
track.update(det, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
else:
|
||||
track.re_activate(det, self.frame_id, new_id=False)
|
||||
refind_stracks.append(track)
|
||||
|
||||
# Mark unmatched tracks as lost
|
||||
for it in u_track_second:
|
||||
track = r_tracked_stracks[it]
|
||||
if track.state != TrackState.Lost:
|
||||
track.mark_lost()
|
||||
lost_stracks.append(track)
|
||||
|
||||
# Step 3: Associate unconfirmed tracks
|
||||
detections_remain = [detections_high[i] for i in u_detection]
|
||||
dists = matching.iou_distance(unconfirmed, detections_remain)
|
||||
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
|
||||
|
||||
for itracked, idet in matches:
|
||||
unconfirmed[itracked].update(detections_remain[idet], self.frame_id)
|
||||
activated_stracks.append(unconfirmed[itracked])
|
||||
|
||||
for it in u_unconfirmed:
|
||||
track = unconfirmed[it]
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
# Step 4: Initialize new tracks
|
||||
for inew in u_detection:
|
||||
track = detections_remain[inew]
|
||||
if track.score < self.track_thresh:
|
||||
continue
|
||||
track.activate(self.kalman_filter, self.frame_id)
|
||||
activated_stracks.append(track)
|
||||
|
||||
# Step 5: Remove old lost tracks
|
||||
for track in self.lost_stracks:
|
||||
if self.frame_id - track.end_frame > self.buffer_size:
|
||||
track.mark_removed()
|
||||
removed_stracks.append(track)
|
||||
|
||||
# Update track lists
|
||||
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
|
||||
self.tracked_stracks = _joint_stracks(self.tracked_stracks, activated_stracks)
|
||||
self.tracked_stracks = _joint_stracks(self.tracked_stracks, refind_stracks)
|
||||
self.lost_stracks = _sub_stracks(self.lost_stracks, self.tracked_stracks)
|
||||
self.lost_stracks.extend(lost_stracks)
|
||||
self.lost_stracks = _sub_stracks(self.lost_stracks, self.removed_stracks)
|
||||
self.tracked_stracks, self.lost_stracks = _remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
|
||||
self.removed_stracks.extend(removed_stracks)
|
||||
|
||||
# Output confirmed tracks
|
||||
outputs = []
|
||||
for track in self.tracked_stracks:
|
||||
if track.is_activated:
|
||||
outputs.append(np.concatenate([track.tlbr, [track.track_id]]))
|
||||
|
||||
return np.array(outputs) if outputs else np.empty((0, 5))
|
||||
|
||||
|
||||
def _joint_stracks(tlista: list, tlistb: list) -> list:
|
||||
"""Merge two track lists without duplicates."""
|
||||
exists = {t.track_id: 1 for t in tlista}
|
||||
res = list(tlista)
|
||||
for t in tlistb:
|
||||
if not exists.get(t.track_id, 0):
|
||||
exists[t.track_id] = 1
|
||||
res.append(t)
|
||||
return res
|
||||
|
||||
|
||||
def _sub_stracks(tlista: list, tlistb: list) -> list:
|
||||
"""Remove tracks in tlistb from tlista."""
|
||||
track_ids_b = {t.track_id for t in tlistb}
|
||||
return [t for t in tlista if t.track_id not in track_ids_b]
|
||||
|
||||
|
||||
def _remove_duplicate_stracks(stracksa: list, stracksb: list) -> tuple[list, list]:
|
||||
"""Remove duplicate tracks based on IoU."""
|
||||
pdist = matching.iou_distance(stracksa, stracksb)
|
||||
pairs = np.where(pdist < 0.15)
|
||||
dupa, dupb = [], []
|
||||
|
||||
for p, q in zip(*pairs, strict=False):
|
||||
timep = stracksa[p].frame_id - stracksa[p].start_frame
|
||||
timeq = stracksb[q].frame_id - stracksb[q].start_frame
|
||||
if timep > timeq:
|
||||
dupb.append(q)
|
||||
else:
|
||||
dupa.append(p)
|
||||
|
||||
resa = [t for i, t in enumerate(stracksa) if i not in dupa]
|
||||
resb = [t for i, t in enumerate(stracksb) if i not in dupb]
|
||||
return resa, resb
|
||||
@@ -17,6 +17,7 @@ Note on mutability:
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, fields
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -151,6 +152,7 @@ class Face:
|
||||
race: Predicted race/ethnicity (optional, from FairFace).
|
||||
emotion: Predicted emotion label (optional, from Emotion model).
|
||||
emotion_confidence: Confidence score for emotion prediction (optional).
|
||||
track_id: Persistent track ID assigned by BYTETracker (optional).
|
||||
|
||||
Properties:
|
||||
sex: Gender as a human-readable string ("Female" or "Male").
|
||||
@@ -171,6 +173,7 @@ class Face:
|
||||
race: str | None = None
|
||||
emotion: str | None = None
|
||||
emotion_confidence: float | None = None
|
||||
track_id: int | None = None
|
||||
|
||||
def compute_similarity(self, other: Face) -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
@@ -182,6 +185,34 @@ class Face:
|
||||
"""Convert to dictionary."""
|
||||
return {f.name: getattr(self, f.name) for f in fields(self)}
|
||||
|
||||
def to_json(self, indent: int | None = None) -> str:
|
||||
"""Serialize the Face object to a JSON string.
|
||||
|
||||
Converts numpy arrays to Python lists and numpy scalar types to
|
||||
native Python types so the result is fully JSON-serializable.
|
||||
|
||||
Args:
|
||||
indent: Number of spaces for pretty-printing. None for compact output.
|
||||
|
||||
Returns:
|
||||
JSON string representation of this Face object.
|
||||
|
||||
Example:
|
||||
>>> face = faces[0]
|
||||
>>> print(face.to_json(indent=2))
|
||||
"""
|
||||
data: dict = {}
|
||||
for f in fields(self):
|
||||
value = getattr(self, f.name)
|
||||
if isinstance(value, np.ndarray):
|
||||
value = value.tolist()
|
||||
elif isinstance(value, np.floating):
|
||||
value = float(value)
|
||||
elif isinstance(value, np.integer):
|
||||
value = int(value)
|
||||
data[f.name] = value
|
||||
return json.dumps(data, indent=indent)
|
||||
|
||||
@property
|
||||
def sex(self) -> str | None:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
@@ -201,6 +232,8 @@ class Face:
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.track_id is not None:
|
||||
parts.append(f'track_id={self.track_id}')
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.age_group is not None:
|
||||
|
||||
@@ -1,341 +0,0 @@
|
||||
# Copyright 2025-2026 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Visualization utilities for UniFace.
|
||||
|
||||
This module provides functions for drawing detection results, gaze directions,
|
||||
and face parsing segmentation maps on images.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
'FACE_PARSING_COLORS',
|
||||
'FACE_PARSING_LABELS',
|
||||
'draw_detections',
|
||||
'draw_fancy_bbox',
|
||||
'draw_gaze',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
# Face parsing component names (19 classes)
|
||||
FACE_PARSING_LABELS = [
|
||||
'background',
|
||||
'skin',
|
||||
'l_brow',
|
||||
'r_brow',
|
||||
'l_eye',
|
||||
'r_eye',
|
||||
'eye_g',
|
||||
'l_ear',
|
||||
'r_ear',
|
||||
'ear_r',
|
||||
'nose',
|
||||
'mouth',
|
||||
'u_lip',
|
||||
'l_lip',
|
||||
'neck',
|
||||
'neck_l',
|
||||
'cloth',
|
||||
'hair',
|
||||
'hat',
|
||||
]
|
||||
|
||||
# Color palette for face parsing visualization
|
||||
FACE_PARSING_COLORS = [
|
||||
[0, 0, 0],
|
||||
[255, 85, 0],
|
||||
[255, 170, 0],
|
||||
[255, 0, 85],
|
||||
[255, 0, 170],
|
||||
[0, 255, 0],
|
||||
[85, 255, 0],
|
||||
[170, 255, 0],
|
||||
[0, 255, 85],
|
||||
[0, 255, 170],
|
||||
[0, 0, 255],
|
||||
[85, 0, 255],
|
||||
[170, 0, 255],
|
||||
[0, 85, 255],
|
||||
[0, 170, 255],
|
||||
[255, 255, 0],
|
||||
[255, 255, 85],
|
||||
[255, 255, 170],
|
||||
[255, 0, 255],
|
||||
]
|
||||
|
||||
|
||||
def draw_detections(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
bboxes: list[np.ndarray] | list[list[float]],
|
||||
scores: np.ndarray | list[float],
|
||||
landmarks: list[np.ndarray] | list[list[list[float]]],
|
||||
vis_threshold: float = 0.6,
|
||||
draw_score: bool = False,
|
||||
fancy_bbox: bool = True,
|
||||
) -> None:
|
||||
"""Draw bounding boxes, landmarks, and optional scores on an image.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bboxes: List of bounding boxes as [x1, y1, x2, y2].
|
||||
scores: List of confidence scores.
|
||||
landmarks: List of landmark sets with shape (5, 2).
|
||||
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
||||
draw_score: Whether to draw confidence scores. Defaults to False.
|
||||
fancy_bbox: Use corner-style bounding boxes. Defaults to True.
|
||||
"""
|
||||
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
# Calculate line thickness based on image size
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Filter detections by confidence threshold
|
||||
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
||||
|
||||
for i in keep_indices:
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
score = scores[i]
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
|
||||
# Calculate dynamic font scale based on bbox height
|
||||
bbox_h = bbox[3] - bbox[1]
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness, proportion=0.2)
|
||||
else:
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 255, 0), line_thickness)
|
||||
|
||||
# Draw confidence score with background
|
||||
if draw_score:
|
||||
text = f'{score:.2f}'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(bbox[0], bbox[1] - text_height - baseline - 10),
|
||||
(bbox[0] + text_width + 10, bbox[1]),
|
||||
(0, 255, 0),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(bbox[0] + 5, bbox[1] - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(0, 0, 0),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), line_thickness + 1, colors[j], -1)
|
||||
|
||||
|
||||
def draw_fancy_bbox(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
color: tuple[int, int, int] = (0, 255, 0),
|
||||
thickness: int = 3,
|
||||
proportion: float = 0.2,
|
||||
) -> None:
|
||||
"""Draw a bounding box with fancy corners on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
color: Color of the bounding box in BGR. Defaults to green.
|
||||
thickness: Thickness of the corner lines. Defaults to 3.
|
||||
proportion: Proportion of corner length to box dimensions. Defaults to 0.2.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
width = x2 - x1
|
||||
height = y2 - y1
|
||||
|
||||
corner_length = int(proportion * min(width, height))
|
||||
|
||||
# Draw the rectangle
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
|
||||
|
||||
# Top-left corner
|
||||
cv2.line(image, (x1, y1), (x1 + corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x1, y1), (x1, y1 + corner_length), color, thickness)
|
||||
|
||||
# Top-right corner
|
||||
cv2.line(image, (x2, y1), (x2 - corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x2, y1), (x2, y1 + corner_length), color, thickness)
|
||||
|
||||
# Bottom-left corner
|
||||
cv2.line(image, (x1, y2), (x1, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x1, y2), (x1 + corner_length, y2), color, thickness)
|
||||
|
||||
# Bottom-right corner
|
||||
cv2.line(image, (x2, y2), (x2, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x2, y2), (x2 - corner_length, y2), color, thickness)
|
||||
|
||||
|
||||
def draw_gaze(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
pitch: np.ndarray | float,
|
||||
yaw: np.ndarray | float,
|
||||
*,
|
||||
draw_bbox: bool = True,
|
||||
fancy_bbox: bool = True,
|
||||
draw_angles: bool = True,
|
||||
) -> None:
|
||||
"""Draw gaze direction with optional bounding box on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Face bounding box [x1, y1, x2, y2].
|
||||
pitch: Vertical gaze angle in radians.
|
||||
yaw: Horizontal gaze angle in radians.
|
||||
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
||||
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
|
||||
"""
|
||||
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
||||
|
||||
# Calculate dynamic line thickness based on image size (same as draw_detections)
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Calculate dynamic font scale based on bbox height (same as draw_detections)
|
||||
bbox_h = y_max - y_min
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box if requested
|
||||
if draw_bbox:
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness)
|
||||
else:
|
||||
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), line_thickness)
|
||||
|
||||
# Calculate center of the bounding box
|
||||
x_center = (x_min + x_max) // 2
|
||||
y_center = (y_min + y_max) // 2
|
||||
|
||||
# Calculate the direction of the gaze
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(pitch) * np.cos(yaw))
|
||||
dy = int(-length * np.sin(yaw))
|
||||
|
||||
point1 = (x_center, y_center)
|
||||
point2 = (x_center + dx, y_center + dy)
|
||||
|
||||
# Calculate dynamic center point radius based on line thickness
|
||||
center_radius = max(line_thickness + 1, 4)
|
||||
|
||||
# Draw gaze direction
|
||||
cv2.circle(image, (x_center, y_center), radius=center_radius, color=(0, 0, 255), thickness=-1)
|
||||
cv2.arrowedLine(
|
||||
image,
|
||||
point1,
|
||||
point2,
|
||||
color=(0, 0, 255),
|
||||
thickness=line_thickness,
|
||||
line_type=cv2.LINE_AA,
|
||||
tipLength=0.25,
|
||||
)
|
||||
|
||||
# Draw angle values
|
||||
if draw_angles:
|
||||
text = f'P:{np.degrees(pitch):.0f}deg Y:{np.degrees(yaw):.0f}deg'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle for text
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(x_min, y_min - text_height - baseline - 10),
|
||||
(x_min + text_width + 10, y_min),
|
||||
(0, 0, 255),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(x_min + 5, y_min - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(255, 255, 255),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
|
||||
def vis_parsing_maps(
|
||||
image: np.ndarray,
|
||||
segmentation_mask: np.ndarray,
|
||||
*,
|
||||
save_image: bool = False,
|
||||
save_path: str = 'result.png',
|
||||
) -> np.ndarray:
|
||||
"""Visualize face parsing segmentation mask by overlaying colored regions.
|
||||
|
||||
Args:
|
||||
image: Input face image in RGB format with shape (H, W, 3).
|
||||
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
|
||||
value represents a facial component class (0-18).
|
||||
save_image: Whether to save the visualization to disk. Defaults to False.
|
||||
save_path: Path to save the visualization if save_image is True.
|
||||
|
||||
Returns:
|
||||
Blended image with segmentation overlay in BGR format.
|
||||
|
||||
Example:
|
||||
>>> import cv2
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface.visualization import vis_parsing_maps
|
||||
>>> parser = BiSeNet()
|
||||
>>> face_image = cv2.imread('face.jpg')
|
||||
>>> mask = parser.parse(face_image)
|
||||
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
>>> result = vis_parsing_maps(face_rgb, mask)
|
||||
>>> cv2.imwrite('parsed_face.jpg', result)
|
||||
"""
|
||||
# Create numpy arrays for image and segmentation mask
|
||||
image = np.array(image).copy().astype(np.uint8)
|
||||
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
|
||||
|
||||
# Create a color mask
|
||||
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
|
||||
|
||||
num_classes = np.max(segmentation_mask)
|
||||
|
||||
for class_index in range(1, num_classes + 1):
|
||||
class_pixels = np.where(segmentation_mask == class_index)
|
||||
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
|
||||
|
||||
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
|
||||
|
||||
# Convert image to BGR format for blending
|
||||
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Blend the image with the segmentation mask
|
||||
blended_image = cv2.addWeighted(bgr_image, 0.6, segmentation_mask_color, 0.4, 0)
|
||||
|
||||
# Save the result if required
|
||||
if save_image:
|
||||
cv2.imwrite(save_path, blended_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
return blended_image
|
||||
Reference in New Issue
Block a user