12 Commits
v1.5.2 ... main

Author SHA1 Message Date
Yakhyokhuja Valikhujaev
cbcd89b167 feat: Common result dataclasses and refactoring several methods. (#50)
* chore: Rename scripts to tools folder and unify argument parser

* refactor: Centralize dataclasses in types.py and add __call__ to all models

- Move Face and result dataclasses to uniface/types.py
- Add GazeResult, SpoofingResult, EmotionResult (frozen=True)
- Add __call__ to BaseDetector, BaseRecognizer, BaseLandmarker
- Add __repr__ to all dataclasses
- Replace print() with Logger in onnx_utils.py
- Update tools and docs to use new dataclass return types
- Add test_types.py with comprehensive dataclass testschore: Rename files under tools folder and unitify argument parser for them
2025-12-30 17:05:24 +09:00
Yakhyokhuja Valikhujaev
50226041c9 refactor: Standardize naming conventions (#47)
* refactor: Standardize naming conventions

* chore: Update the version and re-run experiments

* chore: Improve code quality tooling and documentation

- Add pre-commit job to CI workflow for automated linting on PRs
- Update uniface/__init__.py with copyright header, module docstring,
  and logically grouped exports
- Revise CONTRIBUTING.md to reflect pre-commit handles all formatting
- Remove redundant ruff check from CI (now handled by pre-commit)
- Update build job Python version to 3.11 (matches requires-python)
2025-12-30 00:20:34 +09:00
Yakhyokhuja Valikhujaev
64ad0d2f53 feat: Add FairFace model and AttributeResults return type (#46)
* feat: Add FairFace model and unified AttributeResult return type
- Update FaceAnalyzer to support FairFace
- Update documentation (README.md, QUICKSTART.md, MODELS.md)

* docs: Change python3.10 to python3.11 in python badge

* chore: Remove unused import

* fix: Fix test for age gender to reflect AttributeResult type
2025-12-28 21:07:36 +09:00
Yakhyokhuja Valikhujaev
7c98a60d26 fix: Python 3.10 does not support tomlib (#43) 2025-12-24 00:51:36 +09:00
Yakhyokhuja Valikhujaev
d97a3b2cb2 Merge pull request #42 from yakhyo/feat/standardize-outputs
feat: Standardize detection output and several other updates
2025-12-24 00:38:32 +09:00
yakhyo
2200ba063c docs: Update related docs and ruff formatting 2025-12-24 00:34:24 +09:00
yakhyo
9bcbfa65c2 feat: Update detection module output to datalasses 2025-12-24 00:00:00 +09:00
yakhyo
96306a0910 feat: Update github actions 2025-12-23 23:59:15 +09:00
Yakhyokhuja Valikhujaev
3389aa3e4c feat: Add MiniFasNet for Face Anti Spoofing (#41) 2025-12-20 22:34:47 +09:00
Yakhyokhuja Valikhujaev
b282e6ccc1 docs: Update related docs to face anonymization (#40) 2025-12-20 21:27:26 +09:00
Yakhyokhuja Valikhujaev
d085c6a822 feat: Add face blurring for privacy (#39)
* feat: Add face blurring for privacy

* chore: Revert back the version
2025-12-20 20:57:42 +09:00
yakhyo
13b518e96d chore: Upgrade version to v1.5.3 2025-12-15 15:09:54 +09:00
93 changed files with 6082 additions and 2341 deletions

BIN
.github/logos/gaze_crop.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 716 KiB

BIN
.github/logos/gaze_org.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 673 KiB

View File

@@ -10,14 +10,31 @@ on:
- main
- develop
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test:
lint:
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- uses: pre-commit/action@v3.0.1
test:
runs-on: ${{ matrix.os }}
timeout-minutes: 15
needs: lint
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ["3.11", "3.13"]
steps:
- name: Checkout code
@@ -27,7 +44,7 @@ jobs:
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache: "pip"
- name: Install dependencies
run: |
@@ -38,21 +55,15 @@ jobs:
run: |
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
- name: Lint with ruff (if available)
run: |
pip install ruff || true
ruff check . --exit-zero || true
continue-on-error: true
- name: Run tests
run: pytest -v --tb=short
- name: Test package imports
run: |
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
build:
runs-on: ubuntu-latest
timeout-minutes: 10
needs: test
steps:
@@ -62,8 +73,8 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: 'pip'
python-version: "3.11"
cache: "pip"
- name: Install build tools
run: |
@@ -84,4 +95,3 @@ jobs:
name: dist-python-${{ github.sha }}
path: dist/
retention-days: 7

View File

@@ -5,9 +5,14 @@ on:
tags:
- "v*.*.*" # Trigger only on version tags like v0.1.9
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
validate:
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
version: ${{ steps.get_version.outputs.version }}
tag_version: ${{ steps.get_version.outputs.tag_version }}
@@ -16,13 +21,18 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Get version from tag and pyproject.toml
id: get_version
run: |
TAG_VERSION=${GITHUB_REF#refs/tags/v}
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
echo "Tag version: v$TAG_VERSION"
@@ -38,12 +48,13 @@ jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 15
needs: validate
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
python-version: ["3.11", "3.13"]
steps:
- name: Checkout code
@@ -65,6 +76,7 @@ jobs:
publish:
runs-on: ubuntu-latest
timeout-minutes: 10
needs: [validate, test]
permissions:
contents: write
@@ -105,4 +117,3 @@ jobs:
with:
files: dist/*
generate_release_notes: true

40
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,40 @@
# Pre-commit configuration for UniFace
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
# General file checks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-toml
- id: check-added-large-files
args: ['--maxkb=1000']
- id: check-merge-conflict
- id: debug-statements
- id: check-ast
# Ruff - Fast Python linter and formatter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.14.10
hooks:
- id: ruff
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
- id: ruff-format
# Security checks
- repo: https://github.com/PyCQA/bandit
rev: 1.9.2
hooks:
- id: bandit
args: [-c, pyproject.toml]
additional_dependencies: ['bandit[toml]']
exclude: ^tests/
# Configuration
ci:
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
autoupdate_commit_msg: 'chore: update pre-commit hooks'

View File

@@ -16,16 +16,9 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
2. Create a new branch for your feature
3. Write clear, documented code with type hints
4. Add tests for new functionality
5. Ensure all tests pass
5. Ensure all tests pass and pre-commit hooks are satisfied
6. Submit a pull request with a clear description
### Code Style
- Follow PEP8 guidelines
- Use type hints (Python 3.10+)
- Write docstrings for public APIs
- Keep code simple and readable
## Development Setup
```bash
@@ -34,30 +27,164 @@ cd uniface
pip install -e ".[dev]"
```
### Setting Up Pre-commit Hooks
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
```bash
# Install pre-commit
pip install pre-commit
# Install the git hooks
pre-commit install
# (Optional) Run against all files
pre-commit run --all-files
```
Once installed, pre-commit will automatically run on every commit to check:
- Code formatting and linting (Ruff)
- Security issues (Bandit)
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
## Code Style
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
### Style Guidelines
#### General Rules
- **Line length:** 120 characters maximum
- **Python version:** 3.11+ (use modern syntax)
- **Quote style:** Single quotes for strings, double quotes for docstrings
#### Type Hints
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
```python
# Preferred (modern)
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
...
# Avoid (legacy)
from typing import List, Dict, Optional, Tuple
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
...
```
#### Docstrings
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
```python
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
"""Detect faces in an image.
Args:
image: Input image as a numpy array with shape (H, W, C) in BGR format.
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
Returns:
List of Face objects containing bounding boxes, confidence scores,
and facial landmarks.
Raises:
ValueError: If the input image has invalid dimensions.
Example:
>>> from uniface import detect_faces
>>> faces = detect_faces(image, threshold=0.8)
>>> print(f"Found {len(faces)} faces")
"""
```
#### Import Order
Imports are automatically sorted by Ruff with the following order:
1. **Future** imports (`from __future__ import annotations`)
2. **Standard library** (`os`, `sys`, `typing`, etc.)
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
4. **First-party** (`uniface.*`)
5. **Local** (relative imports like `.base`, `.models`)
```python
from __future__ import annotations
import os
from typing import Any
import cv2
import numpy as np
from uniface.constants import RetinaFaceWeights
from uniface.log import Logger
from .base import BaseDetector
```
#### Code Comments
- Add comments for complex logic, magic numbers, and non-obvious behavior
- Avoid comments that merely restate the code
- Use `# TODO:` with issue links for planned improvements
```python
# RetinaFace FPN strides and corresponding anchor sizes per level
steps = [8, 16, 32]
min_sizes = [[16, 32], [64, 128], [256, 512]]
# Add small epsilon to prevent division by zero
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
```
## Running Tests
```bash
# Run all tests
pytest tests/
# Run with verbose output
pytest tests/ -v
# Run specific test file
pytest tests/test_factory.py
# Run with coverage
pytest tests/ --cov=uniface --cov-report=html
```
## Adding New Features
When adding a new model or feature:
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
3. **Export in `__init__.py`** files at both module and package levels
4. **Write tests** in `tests/` directory
5. **Add example usage** in `tools/` or update existing notebooks
6. **Update documentation** if needed
## Examples
Example notebooks demonstrating library usage:
| Example | Notebook |
|---------|----------|
| Face Detection | [face_detection.ipynb](examples/face_detection.ipynb) |
| Face Alignment | [face_alignment.ipynb](examples/face_alignment.ipynb) |
| Face Recognition | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
| Face Verification | [face_verification.ipynb](examples/face_verification.ipynb) |
| Face Search | [face_search.ipynb](examples/face_search.ipynb) |
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
## Questions?
Open an issue or start a discussion on GitHub.

116
MODELS.md
View File

@@ -20,7 +20,7 @@ RetinaFace models are trained on the WIDER FACE dataset and provide excellent ac
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
#### Usage
@@ -34,7 +34,7 @@ detector = RetinaFace() # Uses MNET_V2
# Specific model
detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_025, # Fastest
conf_thresh=0.5,
confidence_threshold=0.5,
nms_thresh=0.4,
input_size=(640, 640)
)
@@ -52,7 +52,7 @@ SCRFD (Sample and Computation Redistribution for Efficient Face Detection) model
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
#### Usage
@@ -63,14 +63,14 @@ from uniface.constants import SCRFDWeights
# Fast real-time detection
detector = SCRFD(
model_name=SCRFDWeights.SCRFD_500M_KPS,
conf_thresh=0.5,
confidence_threshold=0.5,
input_size=(640, 640)
)
# High accuracy
detector = SCRFD(
model_name=SCRFDWeights.SCRFD_10G_KPS,
conf_thresh=0.5
confidence_threshold=0.5
)
```
@@ -87,7 +87,7 @@ YOLOv5-Face models provide excellent detection accuracy with 5-point facial land
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
#### Usage
@@ -99,29 +99,29 @@ from uniface.constants import YOLOv5FaceWeights
# Lightweight/Mobile
detector = YOLOv5Face(
model_name=YOLOv5FaceWeights.YOLOV5N,
conf_thresh=0.6,
confidence_threshold=0.6,
nms_thresh=0.5
)
# Real-time detection (recommended)
detector = YOLOv5Face(
model_name=YOLOv5FaceWeights.YOLOV5S,
conf_thresh=0.6,
confidence_threshold=0.6,
nms_thresh=0.5
)
# High accuracy
detector = YOLOv5Face(
model_name=YOLOv5FaceWeights.YOLOV5M,
conf_thresh=0.6
confidence_threshold=0.6
)
# Detect faces with landmarks
faces = detector.detect(image)
for face in faces:
bbox = face['bbox'] # [x1, y1, x2, y2]
confidence = face['confidence']
landmarks = face['landmarks'] # 5-point landmarks (5, 2)
bbox = face.bbox # [x1, y1, x2, y2]
confidence = face.confidence
landmarks = face.landmarks # 5-point landmarks (5, 2)
```
---
@@ -259,9 +259,40 @@ landmarks = landmarker.get_landmarks(image, bbox)
from uniface import AgeGender
predictor = AgeGender()
gender, age = predictor.predict(image, bbox)
# Returns: (gender, age_in_years)
# gender: 0 for Female, 1 for Male
result = predictor.predict(image, bbox)
# Returns: AttributeResult with gender, age, sex property
# result.gender: 0 for Female, 1 for Male
# result.sex: "Female" or "Male"
# result.age: age in years
```
---
### FairFace Attributes
| Model Name | Attributes | Params | Size | Use Case |
| ----------- | --------------------- | ------ | ----- | --------------------------- |
| `DEFAULT` | Race, Gender, Age Group | - | 44MB | Balanced demographic prediction |
**Dataset**: Trained on FairFace dataset with balanced demographics
**Note**: FairFace provides more equitable predictions across different racial and gender groups
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
#### Usage
```python
from uniface import FairFace
predictor = FairFace()
result = predictor.predict(image, bbox)
# Returns: AttributeResult with gender, age_group, race, sex property
# result.gender: 0 for Female, 1 for Male
# result.sex: "Female" or "Male"
# result.age_group: "20-29", "30-39", etc.
# result.race: "East Asian", "White", etc.
```
---
@@ -286,7 +317,9 @@ from uniface import Emotion
from uniface.constants import DDAMFNWeights
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
emotion, confidence = predictor.predict(image, landmarks)
result = predictor.predict(image, landmarks)
# result.emotion: predicted emotion label
# result.confidence: confidence score
```
---
@@ -324,8 +357,8 @@ gaze_estimator = MobileGaze() # Uses RESNET34
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
# Estimate gaze from face crop
pitch, yaw = gaze_estimator.estimate(face_crop)
print(f"Pitch: {np.degrees(pitch):.1f}°, Yaw: {np.degrees(yaw):.1f}°")
result = gaze_estimator.estimate(face_crop)
print(f"Pitch: {np.degrees(result.pitch):.1f}°, Yaw: {np.degrees(result.yaw):.1f}°")
```
**Note**: Requires face crop as input. Use face detection first to obtain bounding boxes.
@@ -404,6 +437,47 @@ print(f"Detected {len(np.unique(mask))} facial components")
---
## Anti-Spoofing Models
### MiniFASNet Family
Lightweight face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
| Model Name | Size | Scale | Use Case |
| ---------- | ------ | ----- | ----------------------------- |
| `V1SE` | 1.2 MB | 4.0 | Squeeze-and-excitation variant |
| `V2` ⭐ | 1.2 MB | 2.7 | **Recommended default** |
**Dataset**: Trained on face anti-spoofing datasets
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
#### Usage
```python
from uniface import RetinaFace
from uniface.spoofing import MiniFASNet
from uniface.constants import MiniFASNetWeights
# Default (V2, recommended)
detector = RetinaFace()
spoofer = MiniFASNet()
# V1SE variant
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
# Detect and check liveness
faces = detector.detect(image)
for face in faces:
result = spoofer.predict(image, face.bbox)
# result.is_real: True for real, False for fake
label = 'Real' if result.is_real else 'Fake'
print(f"{label}: {result.confidence:.1%}")
```
**Note**: Requires face bounding box from a detector. Use with RetinaFace, SCRFD, or YOLOv5Face.
---
## Model Updates
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
@@ -427,10 +501,10 @@ model_path = verify_model_weights(
```bash
# Using the provided script
python scripts/download_model.py
python tools/download_model.py
# Download specific model
python scripts/download_model.py --model MNET_V2
python tools/download_model.py --model MNET_V2
```
---
@@ -445,6 +519,8 @@ python scripts/download_model.py --model MNET_V2
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
### Papers

View File

@@ -39,9 +39,9 @@ faces = detector.detect(image)
# Print results
for i, face in enumerate(faces):
print(f"Face {i+1}:")
print(f" Confidence: {face['confidence']:.2f}")
print(f" BBox: {face['bbox']}")
print(f" Landmarks: {len(face['landmarks'])} points")
print(f" Confidence: {face.confidence:.2f}")
print(f" BBox: {face.bbox}")
print(f" Landmarks: {len(face.landmarks)} points")
```
**Output:**
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image)
# Extract visualization data
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
# Draw on image
draw_detections(
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
if faces1 and faces2:
# Extract embeddings
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compute similarity (cosine similarity)
similarity = np.dot(emb1, emb2.T)[0][0]
@@ -159,9 +159,9 @@ while True:
faces = detector.detect(frame)
# Draw results
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
bboxes=bboxes,
@@ -199,9 +199,11 @@ faces = detector.detect(image)
# Predict attributes
for i, face in enumerate(faces):
gender, age = age_gender.predict(image, face['bbox'])
gender_str = 'Female' if gender == 0 else 'Male'
print(f"Face {i+1}: {gender_str}, {age} years old")
result = age_gender.predict(image, face.bbox)
print(f"Face {i+1}: {result.sex}, {result.age} years old")
# result.gender: 0=Female, 1=Male
# result.sex: "Female" or "Male"
# result.age: age in years
```
**Output:**
@@ -213,6 +215,45 @@ Face 2: Female, 28 years old
---
## 5b. FairFace Attributes (2 minutes)
Detect race, gender, and age group with balanced demographics:
```python
import cv2
from uniface import RetinaFace, FairFace
# Initialize models
detector = RetinaFace()
fairface = FairFace()
# Load image
image = cv2.imread("photo.jpg")
faces = detector.detect(image)
# Predict attributes
for i, face in enumerate(faces):
result = fairface.predict(image, face.bbox)
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
# result.gender: 0=Female, 1=Male
# result.sex: "Female" or "Male"
# result.age_group: "20-29", "30-39", etc.
# result.race: "East Asian", "White", etc.
```
**Output:**
```
Face 1: Male, 30-39, East Asian
Face 2: Female, 20-29, White
```
**Race Categories:** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
**Age Groups:** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
---
## 6. Facial Landmarks (2 minutes)
Detect 106 facial landmarks:
@@ -230,7 +271,7 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image)
if faces:
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
print(f"Detected {len(landmarks)} landmarks")
# Draw landmarks
@@ -262,16 +303,15 @@ faces = detector.detect(image)
# Estimate gaze for each face
for i, face in enumerate(faces):
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
x1, y1, x2, y2 = map(int, face.bbox[:4])
face_crop = image[y1:y2, x1:x2]
if face_crop.size > 0:
pitch, yaw = gaze_estimator.estimate(face_crop)
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
result = gaze_estimator.estimate(face_crop)
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
# Draw gaze direction
draw_gaze(image, bbox, pitch, yaw)
draw_gaze(image, face.bbox, result.pitch, result.yaw)
cv2.imwrite("gaze_output.jpg", image)
```
@@ -328,7 +368,138 @@ Detected 12 facial components
---
## 9. Batch Processing (3 minutes)
## 9. Face Anonymization (2 minutes)
Automatically blur faces for privacy protection:
```python
from uniface.privacy import anonymize_faces
import cv2
# One-liner: automatic detection and blurring
image = cv2.imread("group_photo.jpg")
anonymized = anonymize_faces(image, method='pixelate')
cv2.imwrite("anonymized.jpg", anonymized)
print("Faces anonymized successfully!")
```
**Manual control with custom parameters:**
```python
from uniface import RetinaFace
from uniface.privacy import BlurFace
# Initialize detector and blurrer
detector = RetinaFace()
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
# Detect and anonymize
faces = detector.detect(image)
anonymized = blurrer.anonymize(image, faces)
cv2.imwrite("output.jpg", anonymized)
```
**Available blur methods:**
```python
# Pixelation (news media standard)
blurrer = BlurFace(method='pixelate', pixel_blocks=8)
# Gaussian blur (smooth, natural)
blurrer = BlurFace(method='gaussian', blur_strength=4.0)
# Black boxes (maximum privacy)
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
# Elliptical blur (natural face shape)
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=30)
# Median blur (edge-preserving)
blurrer = BlurFace(method='median', blur_strength=3.0)
```
**Webcam anonymization:**
```python
import cv2
from uniface import RetinaFace
from uniface.privacy import BlurFace
detector = RetinaFace()
blurrer = BlurFace(method='pixelate')
cap = cv2.VideoCapture(0)
while True:
ret, frame = cap.read()
if not ret:
break
faces = detector.detect(frame)
frame = blurrer.anonymize(frame, faces, inplace=True)
cv2.imshow('Anonymized', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
```
**Command-line tool:**
```bash
# Anonymize image with pixelation
python tools/face_anonymize.py --source photo.jpg
# Real-time webcam anonymization
python tools/face_anonymize.py --source 0 --method gaussian
# Custom blur strength
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
```
---
## 10. Face Anti-Spoofing (2 minutes)
Detect if a face is real or fake (photo, video replay, mask):
```python
from uniface import RetinaFace
from uniface.spoofing import MiniFASNet
detector = RetinaFace()
spoofer = MiniFASNet() # Uses V2 by default
image = cv2.imread("photo.jpg")
faces = detector.detect(image)
for i, face in enumerate(faces):
result = spoofer.predict(image, face.bbox)
# result.is_real: True for real, False for fake
label = 'Real' if result.is_real else 'Fake'
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
```
**Output:**
```
Face 1: Real (98.5%)
```
**Command-line tool:**
```bash
# Image
python tools/spoofing.py --source photo.jpg
# Webcam
python tools/spoofing.py --source 0
```
---
## 11. Batch Processing (3 minutes)
Process multiple images:
@@ -361,7 +532,7 @@ print("Done!")
---
## 10. Model Selection
## 12. Model Selection
Choose the right model for your use case:
@@ -374,7 +545,7 @@ from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
# Fast detection (mobile/edge devices)
detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_025,
conf_thresh=0.7
confidence_threshold=0.7
)
# Balanced (recommended)
@@ -385,14 +556,14 @@ detector = RetinaFace(
# Real-time with high accuracy
detector = YOLOv5Face(
model_name=YOLOv5FaceWeights.YOLOV5S,
conf_thresh=0.6,
confidence_threshold=0.6,
nms_thresh=0.5
)
# High accuracy (server/GPU)
detector = SCRFD(
model_name=SCRFDWeights.SCRFD_10G_KPS,
conf_thresh=0.5
confidence_threshold=0.5
)
```
@@ -497,13 +668,14 @@ Explore interactive examples for common tasks:
| Example | Description | Notebook |
|---------|-------------|----------|
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
| **Gaze Estimation** | Estimate gaze direction | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
| **Gaze Estimation** | Estimate gaze direction | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
### Additional Resources
@@ -519,4 +691,5 @@ Explore interactive examples for common tasks:
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation)
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - Race, gender, age prediction
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)

195
README.md
View File

@@ -1,11 +1,15 @@
# UniFace: All-in-One Face Analysis Library
<div align="center">
[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![Python](https://img.shields.io/badge/Python-3.10%2B-blue)](https://www.python.org/)
[![Python](https://img.shields.io/badge/Python-3.11%2B-blue)](https://www.python.org/)
[![PyPI](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
[![CI](https://github.com/yakhyo/uniface/actions/workflows/ci.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-yakhyo%2Funiface-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/yakhyo/uniface)
[![Downloads](https://static.pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-AI_Docs-blue.svg?logo=bookstack)](https://deepwiki.com/yakhyo/uniface)
</div>
<div align="center">
<img src=".github/logos/logo_web.webp" width=75%>
@@ -22,7 +26,9 @@
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
- **Face Parsing**: BiSeNet-based semantic segmentation with 19 facial component classes
- **Gaze Estimation**: Real-time gaze direction prediction with MobileGaze
- **Attribute Analysis**: Age, gender, and emotion detection
- **Attribute Analysis**: Age, gender, race (FairFace), and emotion detection
- **Anti-Spoofing**: Face liveness detection with MiniFASNet models
- **Face Anonymization**: Privacy-preserving face blurring with 5 methods (pixelate, gaussian, blackout, elliptical, median)
- **Face Alignment**: Precise alignment for downstream tasks
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
- **Simple API**: Intuitive factory functions and clean interfaces
@@ -99,9 +105,9 @@ faces = detector.detect(image)
# Process results
for face in faces:
bbox = face['bbox'] # [x1, y1, x2, y2]
confidence = face['confidence']
landmarks = face['landmarks'] # 5-point landmarks
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
confidence = face.confidence
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
print(f"Face detected with confidence: {confidence:.2f}")
```
@@ -119,8 +125,8 @@ recognizer = ArcFace()
faces1 = detector.detect(image1)
faces2 = detector.detect(image2)
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compare faces
similarity = compute_similarity(embedding1, embedding2)
@@ -136,7 +142,7 @@ detector = RetinaFace()
landmarker = Landmark106()
faces = detector.detect(image)
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
# Returns 106 (x, y) landmark points
```
@@ -149,9 +155,28 @@ detector = RetinaFace()
age_gender = AgeGender()
faces = detector.detect(image)
gender, age = age_gender.predict(image, faces[0]['bbox'])
gender_str = 'Female' if gender == 0 else 'Male'
print(f"{gender_str}, {age} years old")
result = age_gender.predict(image, faces[0].bbox)
print(f"{result.sex}, {result.age} years old")
# result.gender: 0=Female, 1=Male
# result.sex: "Female" or "Male"
# result.age: age in years
```
### FairFace Attributes (Race, Gender, Age Group)
```python
from uniface import RetinaFace, FairFace
detector = RetinaFace()
fairface = FairFace()
faces = detector.detect(image)
result = fairface.predict(image, faces[0].bbox)
print(f"{result.sex}, {result.age_group}, {result.race}")
# result.gender: 0=Female, 1=Male
# result.sex: "Female" or "Male"
# result.age_group: "20-29", "30-39", etc.
# result.race: "East Asian", "White", etc.
```
### Gaze Estimation
@@ -166,15 +191,14 @@ gaze_estimator = MobileGaze()
faces = detector.detect(image)
for face in faces:
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
x1, y1, x2, y2 = map(int, face.bbox[:4])
face_crop = image[y1:y2, x1:x2]
pitch, yaw = gaze_estimator.estimate(face_crop)
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
result = gaze_estimator.estimate(face_crop)
print(f"Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
# Visualize
draw_gaze(image, bbox, pitch, yaw)
draw_gaze(image, face.bbox, result.pitch, result.yaw)
```
### Face Parsing
@@ -198,6 +222,78 @@ vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
print(f"Unique classes: {len(np.unique(mask))}")
```
### Face Anti-Spoofing
Detect if a face is real or fake (photo, video replay, mask):
```python
from uniface import RetinaFace
from uniface.spoofing import MiniFASNet
detector = RetinaFace()
spoofer = MiniFASNet() # Uses V2 by default
faces = detector.detect(image)
for face in faces:
result = spoofer.predict(image, face.bbox)
# result.is_real: True for real, False for fake
# result.confidence: confidence score
label = 'Real' if result.is_real else 'Fake'
print(f"{label}: {result.confidence:.1%}")
```
### Face Anonymization
Protect privacy by blurring or pixelating faces with 5 different methods:
```python
from uniface import RetinaFace
from uniface.privacy import BlurFace, anonymize_faces
import cv2
# Method 1: One-liner with automatic detection
image = cv2.imread("photo.jpg")
anonymized = anonymize_faces(image, method='pixelate')
cv2.imwrite("anonymized.jpg", anonymized)
# Method 2: Manual control with custom parameters
detector = RetinaFace()
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
faces = detector.detect(image)
anonymized = blurrer.anonymize(image, faces)
# Available blur methods:
methods = {
'pixelate': BlurFace(method='pixelate', pixel_blocks=10), # Blocky effect (news media standard)
'gaussian': BlurFace(method='gaussian', blur_strength=3.0), # Smooth, natural blur
'blackout': BlurFace(method='blackout', color=(0, 0, 0)), # Solid color boxes (maximum privacy)
'elliptical': BlurFace(method='elliptical', margin=20), # Soft oval blur (natural face shape)
'median': BlurFace(method='median', blur_strength=3.0) # Edge-preserving blur
}
# Real-time webcam anonymization
cap = cv2.VideoCapture(0)
detector = RetinaFace()
blurrer = BlurFace(method='pixelate')
while True:
ret, frame = cap.read()
if not ret:
break
faces = detector.detect(frame)
frame = blurrer.anonymize(frame, faces, inplace=True)
cv2.imshow('Anonymized', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
```
---
## Documentation
@@ -216,6 +312,7 @@ print(f"Unique classes: {len(np.unique(mask))}")
from uniface.detection import RetinaFace, SCRFD
from uniface.recognition import ArcFace
from uniface.landmark import Landmark106
from uniface.privacy import BlurFace, anonymize_faces
from uniface.constants import SCRFDWeights
@@ -225,7 +322,7 @@ detector = RetinaFace()
# Create with custom config
detector = SCRFD(
model_name=SCRFDWeights.SCRFD_10G_KPS, # SCRFDWeights.SCRFD_500M_KPS
conf_thresh=0.4,
confidence_threshold=0.4,
input_size=(640, 640)
)
# Or with defaults settings: detector = SCRFD()
@@ -244,16 +341,16 @@ from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
# Detection
detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
nms_thresh=0.4
confidence_threshold=0.5,
nms_threshold=0.4
)
# Or detector = RetinaFace()
# YOLOv5-Face detection
detector = YOLOv5Face(
model_name=YOLOv5FaceWeights.YOLOV5S,
conf_thresh=0.6,
nms_thresh=0.5
confidence_threshold=0.6,
nms_threshold=0.5
)
# Or detector = YOLOv5Face
@@ -269,7 +366,7 @@ recognizer = SphereFace() # Angular softmax alternative
from uniface import detect_faces
# One-line face detection
faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: retinaface, scrfd, yolov5face
faces = detect_faces(image, method='retinaface', confidence_threshold=0.8) # methods: retinaface, scrfd, yolov5face
```
### Key Parameters (quick reference)
@@ -278,9 +375,9 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
| Class | Key params (defaults) | Notes |
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `conf_thresh=0.6`, `nms_thresh=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `confidence_threshold=0.6`, `nms_threshold=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
**Recognition**
@@ -295,14 +392,15 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
| Class | Key params (defaults) | Notes |
| --------------- | --------------------------------------------------------------------- | --------------------------------------- |
| `Landmark106` | No required params | 106-point landmarks |
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Requires bbox; ONNXRuntime |
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Returns `AttributeResult` with gender, age |
| `FairFace` | `model_name=FairFaceWeights.DEFAULT`, `input_size=(224, 224)` | Returns `AttributeResult` with gender, age_group, race |
| `Emotion` | `model_weights=DDAMFNWeights.AFFECNET7`, `input_size=(112, 112)` | Requires 5-point landmarks; TorchScript |
**Gaze Estimation**
| Class | Key params (defaults) | Notes |
| ------------- | ------------------------------------------ | ------------------------------------ |
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns (pitch, yaw) angles in radians; trained on Gaze360 |
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns `GazeResult(pitch, yaw)` in radians; trained on Gaze360 |
**Face Parsing**
@@ -310,6 +408,12 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
| ---------- | ---------------------------------------- | ------------------------------------ |
| `BiSeNet` | `model_name=ParsingWeights.RESNET18`, `input_size=(512, 512)` | 19 facial component classes; BiSeNet architecture with ResNet backbone |
**Anti-Spoofing**
| Class | Key params (defaults) | Notes |
| ------------- | ----------------------------------------- | ------------------------------------ |
| `MiniFASNet` | `model_name=MiniFASNetWeights.V2` | Returns `SpoofingResult(is_real, confidence)` |
---
## Model Performance
@@ -332,7 +436,7 @@ _Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.0
**Benchmark on your hardware:**
```bash
python scripts/run_detection.py --image assets/test.jpg --iterations 100
python tools/detection.py --source assets/test.jpg --iterations 100
```
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
@@ -351,13 +455,14 @@ Interactive examples covering common face analysis tasks:
| Example | Description | Notebook |
|---------|-------------|----------|
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
| **Gaze Estimation** | Estimate gaze direction from face images | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
| **Gaze Estimation** | Estimate gaze direction from face images | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
### Webcam Face Detection
@@ -377,9 +482,9 @@ while True:
faces = detector.detect(frame)
# Extract data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
@@ -413,7 +518,7 @@ for person_id, image_path in person_images.items():
faces = detector.detect(image)
if faces:
embedding = recognizer.get_normalized_embedding(
image, faces[0]['landmarks']
image, faces[0].landmarks
)
database[person_id] = embedding
@@ -422,7 +527,7 @@ query_image = cv2.imread("query.jpg")
query_faces = detector.detect(query_image)
if query_faces:
query_embedding = recognizer.get_normalized_embedding(
query_image, query_faces[0]['landmarks']
query_image, query_faces[0].landmarks
)
# Find best match
@@ -551,12 +656,14 @@ uniface/
│ ├── parsing/ # Face parsing
│ ├── gaze/ # Gaze estimation
│ ├── attribute/ # Age, gender, emotion
│ ├── spoofing/ # Face anti-spoofing
│ ├── privacy/ # Face anonymization & blurring
│ ├── onnx_utils.py # ONNX Runtime utilities
│ ├── model_store.py # Model download & caching
│ └── visualization.py # Drawing utilities
├── tests/ # Unit tests
├── examples/ # Example notebooks
└── scripts/ # Utility scripts
└── tools/ # CLI utilities
```
---
@@ -568,6 +675,8 @@ uniface/
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet face parsing training code and pretrained weights
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
## Contributing

View File

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"2.0.0\n"
]
}
],
@@ -82,8 +82,8 @@
],
"source": [
"detector = RetinaFace(\n",
" conf_thresh=0.5,\n",
" nms_thresh=0.4,\n",
" confidence_threshold=0.5,\n",
" nms_threshold=0.4,\n",
")"
]
},
@@ -153,14 +153,14 @@
"# Load image\n",
"image = cv2.imread(image_path)\n",
"\n",
"# Detect faces - returns list of face dictionaries\n",
"# Detect faces - returns list of Face objects\n",
"faces = detector.detect(image)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"# Unpack face data for visualization\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"# Draw detections\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
"faces = detector.detect(image, max_num=2)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -258,9 +258,9 @@
"faces = detector.detect(image, max_num=5)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -274,7 +274,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
"- Use `max_num` to limit detected faces"
]

View File

@@ -48,7 +48,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"2.0.0\n"
]
}
],
@@ -87,8 +87,8 @@
],
"source": [
"detector = RetinaFace(\n",
" conf_thresh=0.5,\n",
" nms_thresh=0.4,\n",
" confidence_threshold=0.5,\n",
" nms_threshold=0.4,\n",
")"
]
},
@@ -140,13 +140,13 @@
"\n",
" # Draw detections\n",
" bbox_image = image.copy()\n",
" bboxes = [f['bbox'] for f in faces]\n",
" scores = [f['confidence'] for f in faces]\n",
" landmarks = [f['landmarks'] for f in faces]\n",
" bboxes = [f.bbox for f in faces]\n",
" scores = [f.confidence for f in faces]\n",
" landmarks = [f.landmarks for f in faces]\n",
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
" first_landmarks = faces[0]['landmarks']\n",
" first_landmarks = faces[0].landmarks\n",
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
"\n",
" # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
"- Default output size is 112x112 (standard for face recognition models)\n"
]

View File

@@ -37,7 +37,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"2.0.0\n"
]
}
],
@@ -78,7 +78,7 @@
],
"source": [
"analyzer = FaceAnalyzer(\n",
" detector=RetinaFace(conf_thresh=0.5),\n",
" detector=RetinaFace(confidence_threshold=0.5),\n",
" recognizer=ArcFace()\n",
")"
]

View File

@@ -42,7 +42,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"2.0.0\n"
]
}
],
@@ -74,7 +74,7 @@
],
"source": [
"analyzer = FaceAnalyzer(\n",
" detector=RetinaFace(conf_thresh=0.5),\n",
" detector=RetinaFace(confidence_threshold=0.5),\n",
" recognizer=ArcFace()\n",
")"
]

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"2.0.0\n"
]
}
],
@@ -88,7 +88,7 @@
],
"source": [
"analyzer = FaceAnalyzer(\n",
" detector=RetinaFace(conf_thresh=0.5),\n",
" detector=RetinaFace(confidence_threshold=0.5),\n",
" recognizer=ArcFace(),\n",
" age_gender=AgeGender()\n",
")"

View File

@@ -46,7 +46,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.5.0\n"
"UniFace version: 2.0.0\n"
]
}
],
@@ -365,7 +365,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -379,7 +379,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,

File diff suppressed because one or more lines are too long

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.4.0\n"
"UniFace version: 2.0.0\n"
]
}
],
@@ -86,7 +86,7 @@
],
"source": [
"# Initialize face detector\n",
"detector = RetinaFace(conf_thresh=0.5)\n",
"detector = RetinaFace(confidence_threshold=0.5)\n",
"\n",
"# Initialize gaze estimator (uses ResNet34 by default)\n",
"gaze_estimator = MobileGaze()"
@@ -103,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -152,8 +152,7 @@
"\n",
" # Estimate gaze for each face\n",
" for i, face in enumerate(faces):\n",
" bbox = face['bbox']\n",
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
" face_crop = image[y1:y2, x1:x2]\n",
"\n",
" if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
"\n",
" # Draw gaze without angle text\n",
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
"\n",
" # Convert BGR to RGB for display\n",
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -263,7 +262,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -1,6 +1,6 @@
[project]
name = "uniface"
version = "1.5.1"
version = "2.0.0"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
readme = "README.md"
license = { text = "MIT" }
@@ -9,7 +9,7 @@ maintainers = [
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
]
requires-python = ">=3.10,<3.14"
requires-python = ">=3.11,<3.14"
keywords = [
"face-detection",
"face-recognition",
@@ -34,7 +34,6 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
@@ -73,7 +72,7 @@ uniface = ["py.typed"]
[tool.ruff]
line-length = 120
target-version = "py310"
target-version = "py311"
exclude = [
".git",
".ruff_cache",
@@ -90,13 +89,60 @@ exclude = [
[tool.ruff.format]
quote-style = "single"
docstring-code-format = true
[tool.ruff.lint]
select = ["E", "F", "I", "W"]
select = [
"E", # pycodestyle errors
"F", # pyflakes
"I", # isort
"W", # pycodestyle warnings
"UP", # pyupgrade (modern Python syntax)
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"SIM", # flake8-simplify
"RUF", # Ruff-specific rules
]
ignore = [
"E501", # Line too long (handled by formatter)
"B008", # Function call in default argument (common in FastAPI/Click)
"SIM108", # Use ternary operator (can reduce readability)
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
]
[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
[tool.ruff.lint.isort]
force-single-line = false
force-sort-within-sections = true
known-first-party = ["uniface"]
section-order = [
"future",
"standard-library",
"third-party",
"first-party",
"local-folder",
]
[tool.ruff.lint.pydocstyle]
convention = "google"
[tool.mypy]
python_version = "3.11"
warn_return_any = false
warn_unused_ignores = true
ignore_missing_imports = true
exclude = ["tests/", "scripts/", "examples/"]
# Disable strict return type checking for numpy operations
disable_error_code = ["no-any-return"]
[tool.bandit]
exclude_dirs = ["tests", "scripts", "examples"]
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"

View File

@@ -1,79 +0,0 @@
# Scripts
Scripts for testing UniFace features.
## Available Scripts
| Script | Description |
|--------|-------------|
| `run_detection.py` | Face detection on image or webcam |
| `run_age_gender.py` | Age and gender prediction |
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
| `run_gaze_estimation.py` | Gaze direction estimation |
| `run_landmarks.py` | 106-point facial landmark detection |
| `run_recognition.py` | Face embedding extraction and comparison |
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
| `run_face_search.py` | Real-time face matching against reference |
| `run_video_detection.py` | Face detection on video files |
| `batch_process.py` | Batch process folder of images |
| `download_model.py` | Download model weights |
| `sha256_generate.py` | Generate SHA256 hash for model files |
## Usage Examples
```bash
# Face detection
python scripts/run_detection.py --image assets/test.jpg
python scripts/run_detection.py --webcam
# Age and gender
python scripts/run_age_gender.py --image assets/test.jpg
python scripts/run_age_gender.py --webcam
# Emotion detection
python scripts/run_emotion.py --image assets/test.jpg
python scripts/run_emotion.py --webcam
# Gaze estimation
python scripts/run_gaze_estimation.py --image assets/test.jpg
python scripts/run_gaze_estimation.py --webcam
# Landmarks
python scripts/run_landmarks.py --image assets/test.jpg
python scripts/run_landmarks.py --webcam
# Face recognition (extract embedding)
python scripts/run_recognition.py --image assets/test.jpg
# Face comparison
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
# Face search (match webcam against reference)
python scripts/run_face_search.py --image reference.jpg
# Video processing
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
# Batch processing
python scripts/batch_process.py --input images/ --output results/
# Download models
python scripts/download_model.py --model-type retinaface
python scripts/download_model.py # downloads all
```
## Common Options
| Option | Description |
|--------|-------------|
| `--image` | Path to input image |
| `--webcam` | Use webcam instead of image |
| `--method` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
| `--threshold` | Visualization confidence threshold (default: 0.25) |
| `--save_dir` | Output directory (default: `outputs`) |
## Quick Test
```bash
python scripts/run_detection.py --image assets/test.jpg
```

View File

@@ -1,130 +0,0 @@
# Age and gender prediction on detected faces
# Usage: python run_age_gender.py --image path/to/image.jpg
# python run_age_gender.py --webcam
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, AgeGender, RetinaFace
from uniface.visualization import draw_detections
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
"""Draw age/gender label above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
gender_str = 'Female' if gender_id == 0 else 'Male'
text = f'{gender_str}, {age}y'
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
def process_image(
detector,
age_gender,
image_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for i, face in enumerate(faces):
gender_id, age = age_gender.predict(image, face['bbox'])
gender_str = 'Female' if gender_id == 0 else 'Male'
print(f' Face {i + 1}: {gender_str}, {age} years old')
draw_age_gender_label(image, face['bbox'], gender_id, age)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def run_webcam(detector, age_gender, threshold: float = 0.6):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
# unpack face data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
draw_age_gender_label(frame, face['bbox'], gender_id, age)
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow('Age & Gender Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run age and gender detection')
parser.add_argument('--image', type=str, help='Path to input image')
parser.add_argument('--webcam', action='store_true', help='Use webcam')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--save_dir', type=str, default='outputs')
args = parser.parse_args()
if not args.image and not args.webcam:
parser.error('Either --image or --webcam must be specified')
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
age_gender = AgeGender()
if args.webcam:
run_webcam(detector, age_gender, args.threshold)
else:
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
if __name__ == '__main__':
main()

View File

@@ -1,110 +0,0 @@
# Face detection on image or webcam
# Usage: python run_detection.py --image path/to/image.jpg
# python run_detection.py --webcam
import argparse
import os
import cv2
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
from uniface.visualization import draw_detections
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
if faces:
bboxes = [face['bbox'] for face in faces]
scores = [face['confidence'] for face in faces]
landmarks = [face['landmarks'] for face in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def run_webcam(detector, threshold: float = 0.6):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
# unpack face data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(
image=frame,
bboxes=bboxes,
scores=scores,
landmarks=landmarks,
vis_threshold=threshold,
draw_score=True,
fancy_bbox=True,
)
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow('Face Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run face detection')
parser.add_argument('--image', type=str, help='Path to input image')
parser.add_argument('--webcam', action='store_true', help='Use webcam')
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
parser.add_argument('--save_dir', type=str, default='outputs')
args = parser.parse_args()
if not args.image and not args.webcam:
parser.error('Either --image or --webcam must be specified')
if args.method == 'retinaface':
detector = RetinaFace()
elif args.method == 'scrfd':
detector = SCRFD()
else:
from uniface.constants import YOLOv5FaceWeights
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
if args.webcam:
run_webcam(detector, args.threshold)
else:
process_image(detector, args.image, args.threshold, args.save_dir)
if __name__ == '__main__':
main()

View File

@@ -1,126 +0,0 @@
# Emotion detection on detected faces
# Usage: python run_emotion.py --image path/to/image.jpg
# python run_emotion.py --webcam
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, Emotion, RetinaFace
from uniface.visualization import draw_detections
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
"""Draw emotion label above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
text = f'{emotion} ({confidence:.2f})'
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
def process_image(
detector,
emotion_predictor,
image_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for i, face in enumerate(faces):
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
draw_emotion_label(image, face['bbox'], emotion, confidence)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
# unpack face data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
for face in faces:
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
draw_emotion_label(frame, face['bbox'], emotion, confidence)
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow('Emotion Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run emotion detection')
parser.add_argument('--image', type=str, help='Path to input image')
parser.add_argument('--webcam', action='store_true', help='Use webcam')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--save_dir', type=str, default='outputs')
args = parser.parse_args()
if not args.image and not args.webcam:
parser.error('Either --image or --webcam must be specified')
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
emotion_predictor = Emotion()
if args.webcam:
run_webcam(detector, emotion_predictor, args.threshold)
else:
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
if __name__ == '__main__':
main()

View File

@@ -1,116 +0,0 @@
# Face analysis using FaceAnalyzer
# Usage: python run_face_analyzer.py --image path/to/image.jpg
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
from uniface.visualization import draw_detections
def draw_face_info(image, face, face_id):
"""Draw face ID and attributes above bounding box."""
x1, y1, x2, y2 = map(int, face.bbox)
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
if face.age and face.sex:
lines.append(f'{face.sex}, {face.age}y')
for i, line in enumerate(lines):
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
if y_pos < 20:
y_pos = y2 + 20 + i * 25
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = analyzer.analyze(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
for i, face in enumerate(faces, 1):
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
if face.embedding is not None:
info += f' (embedding: {face.embedding.shape})'
print(info)
if show_similarity and len(faces) >= 2:
print('\nSimilarity Matrix:')
n = len(faces)
sim_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
if i == j:
sim_matrix[i][j] = 1.0
else:
sim = faces[i].compute_similarity(faces[j])
sim_matrix[i][j] = sim
sim_matrix[j][i] = sim
print(' ', end='')
for i in range(n):
print(f' F{i + 1:2d} ', end='')
print('\n ' + '-' * (7 * n))
for i in range(n):
print(f'F{i + 1:2d} | ', end='')
for j in range(n):
print(f'{sim_matrix[i][j]:6.3f} ', end='')
print()
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
pairs.sort(key=lambda x: x[2], reverse=True)
print('\nTop matches (>0.4 = same person):')
for i, j, sim in pairs[:3]:
status = 'Same' if sim > 0.4 else 'Different'
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
for i, face in enumerate(faces, 1):
draw_face_info(image, face, i)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def main():
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
parser.add_argument('--image', type=str, required=True, help='Path to input image')
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
args = parser.parse_args()
if not os.path.exists(args.image):
print(f'Error: Image not found: {args.image}')
return
detector = RetinaFace()
recognizer = ArcFace()
age_gender = AgeGender()
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
if __name__ == '__main__':
main()

View File

@@ -1,126 +0,0 @@
# Face parsing on detected faces
# Usage: python run_face_parsing.py --image path/to/image.jpg
# python run_face_parsing.py --webcam
import argparse
import os
from pathlib import Path
import cv2
from uniface import RetinaFace
from uniface.constants import ParsingWeights
from uniface.parsing import BiSeNet
from uniface.visualization import vis_parsing_maps
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs'):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
result_image = image.copy()
for i, face in enumerate(faces):
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = image[y1:y2, x1:x2]
if face_crop.size == 0:
continue
# Parse the face
mask = parser.parse(face_crop)
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
# Visualize the parsing result
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
# Place the visualization back on the original image
result_image[y1:y2, x1:x2] = vis_result
# Draw bounding box
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
cv2.imwrite(output_path, result_image)
print(f'Output saved: {output_path}')
def run_webcam(detector, parser):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
# Parse the face
mask = parser.parse(face_crop)
# Visualize the parsing result
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
# Place the visualization back on the frame
frame[y1:y2, x1:x2] = vis_result
# Draw bounding box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Parsing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser_arg = argparse.ArgumentParser(description='Run face parsing')
parser_arg.add_argument('--image', type=str, help='Path to input image')
parser_arg.add_argument('--webcam', action='store_true', help='Use webcam')
parser_arg.add_argument('--save_dir', type=str, default='outputs')
parser_arg.add_argument(
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
)
args = parser_arg.parse_args()
if not args.image and not args.webcam:
parser_arg.error('Either --image or --webcam must be specified')
detector = RetinaFace()
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
if args.webcam:
run_webcam(detector, parser)
else:
process_image(detector, parser, args.image, args.save_dir)
if __name__ == '__main__':
main()

View File

@@ -1,97 +0,0 @@
# Real-time face search: match webcam faces against a reference image
# Usage: python run_face_search.py --image reference.jpg
import argparse
import cv2
import numpy as np
from uniface.detection import SCRFD, RetinaFace
from uniface.face_utils import compute_similarity
from uniface.recognition import ArcFace, MobileFace, SphereFace
def get_recognizer(name: str):
if name == 'arcface':
return ArcFace()
elif name == 'mobileface':
return MobileFace()
else:
return SphereFace()
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
image = cv2.imread(image_path)
if image is None:
raise RuntimeError(f'Failed to load image: {image_path}')
faces = detector.detect(image)
if not faces:
raise RuntimeError('No faces found in reference image.')
landmarks = faces[0]['landmarks']
return recognizer.get_normalized_embedding(image, landmarks)
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
raise RuntimeError('Webcam could not be opened.')
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
for face in faces:
bbox = face['bbox']
landmarks = face['landmarks']
x1, y1, x2, y2 = map(int, bbox)
embedding = recognizer.get_normalized_embedding(frame, landmarks)
sim = compute_similarity(ref_embedding, embedding) # compare with reference
# green = match, red = unknown
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
cv2.imshow('Face Recognition', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Face search using a reference image')
parser.add_argument('--image', type=str, required=True, help='Reference face image')
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
parser.add_argument(
'--recognizer',
type=str,
default='arcface',
choices=['arcface', 'mobileface', 'sphereface'],
)
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
recognizer = get_recognizer(args.recognizer)
print(f'Loading reference: {args.image}')
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
run_webcam(detector, recognizer, ref_embedding, args.threshold)
if __name__ == '__main__':
main()

View File

@@ -1,104 +0,0 @@
# Gaze estimation on detected faces
# Usage: python run_gaze_estimation.py --image path/to/image.jpg
# python run_gaze_estimation.py --webcam
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import RetinaFace
from uniface.gaze import MobileGaze
from uniface.visualization import draw_gaze
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
for i, face in enumerate(faces):
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = image[y1:y2, x1:x2]
if face_crop.size == 0:
continue
pitch, yaw = gaze_estimator.estimate(face_crop)
print(f' Face {i + 1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°')
# Draw both bbox and gaze arrow with angle text
draw_gaze(image, bbox, pitch, yaw, draw_angles=True)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def run_webcam(detector, gaze_estimator):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
pitch, yaw = gaze_estimator.estimate(face_crop)
# Draw both bbox and gaze arrow
draw_gaze(frame, bbox, pitch, yaw)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Gaze Estimation', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run gaze estimation')
parser.add_argument('--image', type=str, help='Path to input image')
parser.add_argument('--webcam', action='store_true', help='Use webcam')
parser.add_argument('--save_dir', type=str, default='outputs')
args = parser.parse_args()
if not args.image and not args.webcam:
parser.error('Either --image or --webcam must be specified')
detector = RetinaFace()
gaze_estimator = MobileGaze()
if args.webcam:
run_webcam(detector, gaze_estimator)
else:
process_image(detector, gaze_estimator, args.image, args.save_dir)
if __name__ == '__main__':
main()

View File

@@ -1,117 +0,0 @@
# 106-point facial landmark detection
# Usage: python run_landmarks.py --image path/to/image.jpg
# python run_landmarks.py --webcam
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, Landmark106, RetinaFace
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
for i, face in enumerate(faces):
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
landmarks = landmarker.get_landmarks(image, bbox)
print(f' Face {i + 1}: {len(landmarks)} landmarks')
for x, y in landmarks.astype(int):
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
cv2.putText(
image,
f'Face {i + 1}',
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def run_webcam(detector, landmarker):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
for face in faces:
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow('106-Point Landmarks', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run facial landmark detection')
parser.add_argument('--image', type=str, help='Path to input image')
parser.add_argument('--webcam', action='store_true', help='Use webcam')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--save_dir', type=str, default='outputs')
args = parser.parse_args()
if not args.image and not args.webcam:
parser.error('Either --image or --webcam must be specified')
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
landmarker = Landmark106()
if args.webcam:
run_webcam(detector, landmarker)
else:
process_image(detector, landmarker, args.image, args.save_dir)
if __name__ == '__main__':
main()

View File

@@ -1,109 +0,0 @@
# Face detection on video files
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
import argparse
from pathlib import Path
import cv2
from tqdm import tqdm
from uniface import SCRFD, RetinaFace
from uniface.visualization import draw_detections
def process_video(
detector,
input_path: str,
output_path: str,
threshold: float = 0.6,
show_preview: bool = False,
):
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{input_path}'")
return
# get video properties
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
print(f'Output: {output_path}')
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
if not out.isOpened():
print(f"Error: Cannot create output video '{output_path}'")
cap.release()
return
frame_count = 0
total_faces = 0
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
total_faces += len(faces)
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
out.write(frame)
if show_preview:
cv2.imshow("Processing - Press 'q' to cancel", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
print('\nCancelled by user')
break
cap.release()
out.release()
if show_preview:
cv2.destroyAllWindows()
avg_faces = total_faces / frame_count if frame_count > 0 else 0
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
print(f'Saved: {output_path}')
def main():
parser = argparse.ArgumentParser(description='Process video with face detection')
parser.add_argument('--input', type=str, required=True, help='Input video path')
parser.add_argument('--output', type=str, required=True, help='Output video path')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--preview', action='store_true', help='Show live preview')
args = parser.parse_args()
if not Path(args.input).exists():
print(f"Error: Input file '{args.input}' does not exist")
return
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
process_video(detector, args.input, args.output, args.threshold, args.preview)
if __name__ == '__main__':
main()

View File

@@ -1,7 +1,15 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for AgeGender attribute predictor."""
from __future__ import annotations
import numpy as np
import pytest
from uniface.attribute import AgeGender
from uniface.attribute import AgeGender, AttributeResult
@pytest.fixture
@@ -24,19 +32,22 @@ def test_model_initialization(age_gender_model):
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
assert isinstance(age, int), f'Age should be int, got {type(age)}'
result = age_gender_model.predict(mock_image, mock_bbox)
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
def test_gender_values(age_gender_model, mock_image, mock_bbox):
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
result = age_gender_model.predict(mock_image, mock_bbox)
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
def test_age_range(age_gender_model, mock_image, mock_bbox):
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
result = age_gender_model.predict(mock_image, mock_bbox)
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
def test_different_bbox_sizes(age_gender_model, mock_image):
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
]
for bbox in test_bboxes:
gender_id, age = age_gender_model.predict(mock_image, bbox)
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
result = age_gender_model.predict(mock_image, bbox)
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
def test_different_image_sizes(age_gender_model, mock_bbox):
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
for size in test_sizes:
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
assert gender_id in [0, 1], f'Failed for image size {size}'
assert 0 <= age <= 120, f'Age out of range for image size {size}'
result = age_gender_model.predict(mock_image, mock_bbox)
assert result.gender in [0, 1], f'Failed for image size {size}'
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
def test_consistency(age_gender_model, mock_image, mock_bbox):
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
result1 = age_gender_model.predict(mock_image, mock_bbox)
result2 = age_gender_model.predict(mock_image, mock_bbox)
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
assert age1 == age2, 'Same input should produce same age prediction'
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
assert result1.age == result2.age, 'Same input should produce same age prediction'
def test_bbox_list_format(age_gender_model, mock_image):
bbox_list = [100, 100, 300, 300]
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
assert gender_id in [0, 1], 'Should work with bbox as list'
assert 0 <= age <= 120, 'Age should be in valid range'
result = age_gender_model.predict(mock_image, bbox_list)
assert result.gender in [0, 1], 'Should work with bbox as list'
assert 0 <= result.age <= 120, 'Age should be in valid range'
def test_bbox_array_format(age_gender_model, mock_image):
bbox_array = np.array([100, 100, 300, 300])
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
assert 0 <= age <= 120, 'Age should be in valid range'
result = age_gender_model.predict(mock_image, bbox_array)
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
assert 0 <= result.age <= 120, 'Age should be in valid range'
def test_multiple_predictions(age_gender_model, mock_image):
@@ -93,25 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
results = []
for bbox in bboxes:
gender_id, age = age_gender_model.predict(mock_image, bbox)
results.append((gender_id, age))
result = age_gender_model.predict(mock_image, bbox)
results.append(result)
assert len(results) == 3, 'Should have 3 predictions'
for gender_id, age in results:
assert gender_id in [0, 1]
assert 0 <= age <= 120
for result in results:
assert result.gender in [0, 1]
assert 0 <= result.age <= 120
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
for _ in range(5):
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
assert age >= 0, f'Age should be non-negative, got {age}'
result = age_gender_model.predict(mock_image, mock_bbox)
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
gender_str = 'Female' if gender_id == 0 else 'Male'
text = f'{gender_str}, {age}y'
result = age_gender_model.predict(mock_image, mock_bbox)
text = f'{result.sex}, {result.age}y'
assert isinstance(text, str), 'Should be able to format as string'
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
assert 'y' in text, "Text should contain 'y' for years"
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
"""Test that AttributeResult has correct fields for AgeGender model."""
result = age_gender_model.predict(mock_image, mock_bbox)
# AgeGender should set gender and age
assert result.gender is not None
assert result.age is not None
# AgeGender should NOT set race and age_group (FairFace only)
assert result.race is None
assert result.age_group is None

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
from __future__ import annotations
import numpy as np
import pytest
@@ -35,8 +43,8 @@ def test_create_detector_with_config():
detector = create_detector(
'retinaface',
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.8,
nms_thresh=0.3,
confidence_threshold=0.8,
nms_threshold=0.3,
)
assert detector is not None, 'Failed to create detector with custom config'
@@ -53,7 +61,7 @@ def test_create_detector_scrfd_with_model():
"""
Test creating SCRFD detector with specific model.
"""
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
assert detector is not None, 'Failed to create SCRFD with specific model'
@@ -141,13 +149,13 @@ def test_detect_faces_with_threshold():
Test detect_faces with custom confidence threshold.
"""
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
assert isinstance(faces, list), 'detect_faces should return a list'
# All detections should respect threshold
for face in faces:
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
def test_detect_faces_default_method():
@@ -246,8 +254,8 @@ def test_detector_with_different_configs():
"""
Test creating multiple detectors with different configurations.
"""
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for 106-point facial landmark detector."""
from __future__ import annotations
import numpy as np
import pytest

View File

@@ -2,6 +2,10 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for BiSeNet face parsing model."""
from __future__ import annotations
import numpy as np
import pytest

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
from __future__ import annotations
import numpy as np
import pytest

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for RetinaFace detector."""
from __future__ import annotations
import numpy as np
import pytest
@@ -9,9 +17,9 @@ from uniface.detection import RetinaFace
def retinaface_model():
return RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
confidence_threshold=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
nms_threshold=0.4,
post_nms_topk=750,
)
@@ -27,15 +35,15 @@ def test_inference_on_640x640_image(retinaface_model):
assert isinstance(faces, list), 'Detections should be a list.'
for face in faces:
assert isinstance(face, dict), 'Each detection should be a dictionary.'
assert 'bbox' in face, "Each detection should have a 'bbox' key."
assert 'confidence' in face, "Each detection should have a 'confidence' key."
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
# Face is a dataclass, check attributes exist
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
bbox = face['bbox']
bbox = face.bbox
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
landmarks = face['landmarks']
landmarks = face.landmarks
assert len(landmarks) == 5, 'Should have 5 landmark points.'
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
@@ -45,7 +53,7 @@ def test_confidence_threshold(retinaface_model):
faces = retinaface_model.detect(mock_image)
for face in faces:
confidence = face['confidence']
confidence = face.confidence
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for SCRFD detector."""
from __future__ import annotations
import numpy as np
import pytest
@@ -9,8 +17,8 @@ from uniface.detection import SCRFD
def scrfd_model():
return SCRFD(
model_name=SCRFDWeights.SCRFD_500M_KPS,
conf_thresh=0.5,
nms_thresh=0.4,
confidence_threshold=0.5,
nms_threshold=0.4,
)
@@ -25,15 +33,15 @@ def test_inference_on_640x640_image(scrfd_model):
assert isinstance(faces, list), 'Detections should be a list.'
for face in faces:
assert isinstance(face, dict), 'Each detection should be a dictionary.'
assert 'bbox' in face, "Each detection should have a 'bbox' key."
assert 'confidence' in face, "Each detection should have a 'confidence' key."
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
# Face is a dataclass, check attributes exist
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
bbox = face['bbox']
bbox = face.bbox
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
landmarks = face['landmarks']
landmarks = face.landmarks
assert len(landmarks) == 5, 'Should have 5 landmark points.'
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
@@ -43,7 +51,7 @@ def test_confidence_threshold(scrfd_model):
faces = scrfd_model.detect(mock_image)
for face in faces:
confidence = face['confidence']
confidence = face.confidence
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
@@ -63,7 +71,7 @@ def test_different_input_sizes(scrfd_model):
def test_scrfd_10g_model():
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
assert model is not None, 'SCRFD 10G model initialization failed.'
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)

282
tests/test_types.py Normal file
View File

@@ -0,0 +1,282 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for UniFace type definitions (dataclasses)."""
from __future__ import annotations
import numpy as np
import pytest
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
class TestGazeResult:
"""Tests for GazeResult dataclass."""
def test_creation(self):
result = GazeResult(pitch=0.1, yaw=-0.2)
assert result.pitch == 0.1
assert result.yaw == -0.2
def test_immutability(self):
result = GazeResult(pitch=0.1, yaw=-0.2)
with pytest.raises(AttributeError):
result.pitch = 0.5 # type: ignore
def test_repr(self):
result = GazeResult(pitch=0.1234, yaw=-0.5678)
repr_str = repr(result)
assert 'GazeResult' in repr_str
assert '0.1234' in repr_str
assert '-0.5678' in repr_str
def test_equality(self):
result1 = GazeResult(pitch=0.1, yaw=-0.2)
result2 = GazeResult(pitch=0.1, yaw=-0.2)
assert result1 == result2
def test_hashable(self):
"""Frozen dataclasses should be hashable."""
result = GazeResult(pitch=0.1, yaw=-0.2)
# Should not raise
hash(result)
# Can be used in sets/dicts
result_set = {result}
assert result in result_set
class TestSpoofingResult:
"""Tests for SpoofingResult dataclass."""
def test_creation_real(self):
result = SpoofingResult(is_real=True, confidence=0.95)
assert result.is_real is True
assert result.confidence == 0.95
def test_creation_fake(self):
result = SpoofingResult(is_real=False, confidence=0.87)
assert result.is_real is False
assert result.confidence == 0.87
def test_immutability(self):
result = SpoofingResult(is_real=True, confidence=0.95)
with pytest.raises(AttributeError):
result.is_real = False # type: ignore
def test_repr_real(self):
result = SpoofingResult(is_real=True, confidence=0.9512)
repr_str = repr(result)
assert 'SpoofingResult' in repr_str
assert 'Real' in repr_str
assert '0.9512' in repr_str
def test_repr_fake(self):
result = SpoofingResult(is_real=False, confidence=0.8765)
repr_str = repr(result)
assert 'Fake' in repr_str
def test_hashable(self):
result = SpoofingResult(is_real=True, confidence=0.95)
hash(result)
class TestEmotionResult:
"""Tests for EmotionResult dataclass."""
def test_creation(self):
result = EmotionResult(emotion='Happy', confidence=0.92)
assert result.emotion == 'Happy'
assert result.confidence == 0.92
def test_immutability(self):
result = EmotionResult(emotion='Sad', confidence=0.75)
with pytest.raises(AttributeError):
result.emotion = 'Happy' # type: ignore
def test_repr(self):
result = EmotionResult(emotion='Angry', confidence=0.8123)
repr_str = repr(result)
assert 'EmotionResult' in repr_str
assert 'Angry' in repr_str
assert '0.8123' in repr_str
def test_various_emotions(self):
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
for emotion in emotions:
result = EmotionResult(emotion=emotion, confidence=0.5)
assert result.emotion == emotion
def test_hashable(self):
result = EmotionResult(emotion='Happy', confidence=0.92)
hash(result)
class TestAttributeResult:
"""Tests for AttributeResult dataclass."""
def test_age_gender_result(self):
result = AttributeResult(gender=1, age=25)
assert result.gender == 1
assert result.age == 25
assert result.age_group is None
assert result.race is None
assert result.sex == 'Male'
def test_fairface_result(self):
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
assert result.gender == 0
assert result.age is None
assert result.age_group == '20-29'
assert result.race == 'East Asian'
assert result.sex == 'Female'
def test_sex_property_female(self):
result = AttributeResult(gender=0)
assert result.sex == 'Female'
def test_sex_property_male(self):
result = AttributeResult(gender=1)
assert result.sex == 'Male'
def test_immutability(self):
result = AttributeResult(gender=1, age=30)
with pytest.raises(AttributeError):
result.age = 31 # type: ignore
def test_repr_age_gender(self):
result = AttributeResult(gender=1, age=25)
repr_str = repr(result)
assert 'AttributeResult' in repr_str
assert 'Male' in repr_str
assert 'age=25' in repr_str
def test_repr_fairface(self):
result = AttributeResult(gender=0, age_group='30-39', race='White')
repr_str = repr(result)
assert 'Female' in repr_str
assert 'age_group=30-39' in repr_str
assert 'race=White' in repr_str
def test_hashable(self):
result = AttributeResult(gender=1, age=25)
hash(result)
class TestFace:
"""Tests for Face dataclass."""
@pytest.fixture
def sample_face(self):
return Face(
bbox=np.array([100, 100, 200, 200]),
confidence=0.95,
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
)
def test_creation(self, sample_face):
assert sample_face.confidence == 0.95
assert sample_face.bbox.shape == (4,)
assert sample_face.landmarks.shape == (5, 2)
def test_optional_attributes_default_none(self, sample_face):
assert sample_face.embedding is None
assert sample_face.gender is None
assert sample_face.age is None
assert sample_face.age_group is None
assert sample_face.race is None
assert sample_face.emotion is None
assert sample_face.emotion_confidence is None
def test_mutability(self, sample_face):
"""Face should be mutable for FaceAnalyzer enrichment."""
sample_face.gender = 1
sample_face.age = 25
sample_face.embedding = np.random.randn(512)
assert sample_face.gender == 1
assert sample_face.age == 25
assert sample_face.embedding.shape == (512,)
def test_sex_property_none(self, sample_face):
assert sample_face.sex is None
def test_sex_property_female(self, sample_face):
sample_face.gender = 0
assert sample_face.sex == 'Female'
def test_sex_property_male(self, sample_face):
sample_face.gender = 1
assert sample_face.sex == 'Male'
def test_bbox_xyxy(self, sample_face):
bbox_xyxy = sample_face.bbox_xyxy
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
def test_bbox_xywh(self, sample_face):
bbox_xywh = sample_face.bbox_xywh
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
def test_to_dict(self, sample_face):
result = sample_face.to_dict()
assert isinstance(result, dict)
assert 'bbox' in result
assert 'confidence' in result
assert 'landmarks' in result
def test_repr_minimal(self, sample_face):
repr_str = repr(sample_face)
assert 'Face' in repr_str
assert 'confidence=0.950' in repr_str
def test_repr_with_attributes(self, sample_face):
sample_face.gender = 1
sample_face.age = 30
sample_face.emotion = 'Happy'
repr_str = repr(sample_face)
assert 'age=30' in repr_str
assert 'sex=Male' in repr_str
assert 'emotion=Happy' in repr_str
def test_compute_similarity_no_embeddings(self, sample_face):
other_face = Face(
bbox=np.array([50, 50, 150, 150]),
confidence=0.90,
landmarks=np.random.randn(5, 2),
)
with pytest.raises(ValueError, match='Both faces must have embeddings'):
sample_face.compute_similarity(other_face)
def test_compute_similarity_with_embeddings(self, sample_face):
# Create normalized embeddings
sample_face.embedding = np.random.randn(512)
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
other_face = Face(
bbox=np.array([50, 50, 150, 150]),
confidence=0.90,
landmarks=np.random.randn(5, 2),
)
other_face.embedding = np.random.randn(512)
other_face.embedding /= np.linalg.norm(other_face.embedding)
similarity = sample_face.compute_similarity(other_face)
assert isinstance(similarity, float)
assert -1 <= similarity <= 1
def test_compute_similarity_same_embedding(self, sample_face):
embedding = np.random.randn(512)
embedding /= np.linalg.norm(embedding)
sample_face.embedding = embedding.copy()
other_face = Face(
bbox=np.array([50, 50, 150, 150]),
confidence=0.90,
landmarks=np.random.randn(5, 2),
embedding=embedding.copy(),
)
similarity = sample_face.compute_similarity(other_face)
assert similarity == pytest.approx(1.0, abs=1e-5)

View File

@@ -1,3 +1,11 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
from __future__ import annotations
import numpy as np
import pytest
@@ -116,7 +124,7 @@ def test_compute_similarity_dtype():
emb2 = emb2 / np.linalg.norm(emb2)
similarity = compute_similarity(emb1, emb2)
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
# face_alignment tests
@@ -259,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
# Should be a valid similarity score
assert -1.0 <= similarity <= 1.0
assert isinstance(similarity, (float, np.floating))
assert isinstance(similarity, float | np.floating)

121
tools/README.md Normal file
View File

@@ -0,0 +1,121 @@
# Tools
CLI utilities for testing and running UniFace features.
## Available Tools
| Tool | Description |
|------|-------------|
| `detection.py` | Face detection on image, video, or webcam |
| `face_anonymize.py` | Face anonymization/blurring for privacy |
| `age_gender.py` | Age and gender prediction |
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
| `gaze_estimation.py` | Gaze direction estimation |
| `landmarks.py` | 106-point facial landmark detection |
| `recognition.py` | Face embedding extraction and comparison |
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
| `face_search.py` | Real-time face matching against reference |
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
| `spoofing.py` | Face anti-spoofing detection |
| `face_parsing.py` | Face semantic segmentation |
| `video_detection.py` | Face detection on video files with progress bar |
| `batch_process.py` | Batch process folder of images |
| `download_model.py` | Download model weights |
| `sha256_generate.py` | Generate SHA256 hash for model files |
## Unified `--source` Pattern
All tools use a unified `--source` argument that accepts:
- **Image path**: `--source photo.jpg`
- **Video path**: `--source video.mp4`
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
## Usage Examples
```bash
# Face detection
python tools/detection.py --source assets/test.jpg # image
python tools/detection.py --source video.mp4 # video
python tools/detection.py --source 0 # webcam
# Face anonymization
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
python tools/face_anonymize.py --source video.mp4 --method gaussian
python tools/face_anonymize.py --source 0 --method pixelate
# Age and gender
python tools/age_gender.py --source assets/test.jpg
python tools/age_gender.py --source 0
# Emotion detection
python tools/face_emotion.py --source assets/test.jpg
python tools/face_emotion.py --source 0
# Gaze estimation
python tools/gaze_estimation.py --source assets/test.jpg
python tools/gaze_estimation.py --source 0
# Landmarks
python tools/landmarks.py --source assets/test.jpg
python tools/landmarks.py --source 0
# FairFace attributes
python tools/fairface.py --source assets/test.jpg
python tools/fairface.py --source 0
# Face parsing
python tools/face_parsing.py --source assets/test.jpg
python tools/face_parsing.py --source 0
# Face anti-spoofing
python tools/spoofing.py --source assets/test.jpg
python tools/spoofing.py --source 0
# Face analyzer
python tools/face_analyzer.py --source assets/test.jpg
python tools/face_analyzer.py --source 0
# Face recognition (extract embedding)
python tools/recognition.py --image assets/test.jpg
# Face comparison
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
# Face search (match against reference)
python tools/face_search.py --reference person.jpg --source 0
python tools/face_search.py --reference person.jpg --source video.mp4
# Video processing with progress bar
python tools/video_detection.py --source video.mp4
python tools/video_detection.py --source video.mp4 --output output.mp4
# Batch processing
python tools/batch_process.py --input images/ --output results/
# Download models
python tools/download_model.py --model-type retinaface
python tools/download_model.py # downloads all
```
## Common Options
| Option | Description |
|--------|-------------|
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
| `--threshold` | Visualization confidence threshold (default: varies) |
| `--save-dir` | Output directory (default: `outputs`) |
## Supported Formats
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
## Quick Test
```bash
python tools/detection.py --source assets/test.jpg
```

213
tools/age_gender.py Normal file
View File

@@ -0,0 +1,213 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Age and gender prediction on detected faces.
Usage:
python tools/age_gender.py --source path/to/image.jpg
python tools/age_gender.py --source path/to/video.mp4
python tools/age_gender.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, AgeGender, RetinaFace
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def draw_age_gender_label(image, bbox, sex: str, age: int):
"""Draw age/gender label above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
text = f'{sex}, {age}y'
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
def process_image(
detector,
age_gender,
image_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for i, face in enumerate(faces):
result = age_gender.predict(image, face.bbox)
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
draw_age_gender_label(image, face.bbox, result.sex, result.age)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(
detector,
age_gender,
video_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = age_gender.predict(frame, face.bbox)
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = age_gender.predict(frame, face.bbox)
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Age & Gender Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run age and gender detection')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
age_gender = AgeGender()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, age_gender, int(args.source), args.threshold)
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

View File

@@ -1,5 +1,12 @@
# Batch face detection on a folder of images
# Usage: python batch_process.py --input images/ --output results/
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Batch face detection on a folder of images.
Usage:
python tools/batch_process.py --input images/ --output results/
"""
import argparse
from pathlib import Path
@@ -28,9 +35,9 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
faces = detector.detect(image)
# unpack face data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)

196
tools/detection.py Normal file
View File

@@ -0,0 +1,196 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face detection on image, video, or webcam.
Usage:
python tools/detection.py --source path/to/image.jpg
python tools/detection.py --source path/to/video.mp4
python tools/detection.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
if faces:
bboxes = [face.bbox for face in faces]
scores = [face.confidence for face in faces]
landmarks = [face.landmarks for face in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
cv2.imwrite(output_path, image)
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
bboxes=bboxes,
scores=scores,
landmarks=landmarks,
vis_threshold=threshold,
draw_score=True,
fancy_bbox=True,
)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
# Show progress
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
bboxes=bboxes,
scores=scores,
landmarks=landmarks,
vis_threshold=threshold,
draw_score=True,
fancy_bbox=True,
)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run face detection')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
# Initialize detector
if args.method == 'retinaface':
detector = RetinaFace()
elif args.method == 'scrfd':
detector = SCRFD()
else:
from uniface.constants import YOLOv5FaceWeights
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
# Determine source type and process
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, int(args.source), args.threshold)
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, args.source, args.threshold, args.save_dir)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, args.source, args.threshold, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

239
tools/face_analyzer.py Normal file
View File

@@ -0,0 +1,239 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face analysis using FaceAnalyzer.
Usage:
python tools/face_analyzer.py --source path/to/image.jpg
python tools/face_analyzer.py --source path/to/video.mp4
python tools/face_analyzer.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def draw_face_info(image, face, face_id):
"""Draw face ID and attributes above bounding box."""
x1, y1, _x2, y2 = map(int, face.bbox)
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
if face.age and face.sex:
lines.append(f'{face.sex}, {face.age}y')
for i, line in enumerate(lines):
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
if y_pos < 20:
y_pos = y2 + 20 + i * 25
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = analyzer.analyze(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
for i, face in enumerate(faces, 1):
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
if face.embedding is not None:
info += f' (embedding: {face.embedding.shape})'
print(info)
if show_similarity and len(faces) >= 2:
print('\nSimilarity Matrix:')
n = len(faces)
sim_matrix = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
if i == j:
sim_matrix[i][j] = 1.0
else:
sim = faces[i].compute_similarity(faces[j])
sim_matrix[i][j] = sim
sim_matrix[j][i] = sim
print(' ', end='')
for i in range(n):
print(f' F{i + 1:2d} ', end='')
print('\n ' + '-' * (7 * n))
for i in range(n):
print(f'F{i + 1:2d} | ', end='')
for j in range(n):
print(f'{sim_matrix[i][j]:6.3f} ', end='')
print()
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
pairs.sort(key=lambda x: x[2], reverse=True)
print('\nTop matches (>0.4 = same person):')
for i, j, sim in pairs[:3]:
status = 'Same' if sim > 0.4 else 'Different'
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
for i, face in enumerate(faces, 1):
draw_face_info(image, face, i)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = analyzer.analyze(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
for i, face in enumerate(faces, 1):
draw_face_info(frame, face, i)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(analyzer, camera_id: int = 0):
"""Run real-time analysis on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = analyzer.analyze(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
for i, face in enumerate(faces, 1):
draw_face_info(frame, face, i)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Analyzer', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
args = parser.parse_args()
detector = RetinaFace()
recognizer = ArcFace()
age_gender = AgeGender()
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(analyzer, int(args.source))
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(analyzer, args.source, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

281
tools/face_anonymize.py Normal file
View File

@@ -0,0 +1,281 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face anonymization/blurring for privacy.
Usage:
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
python tools/face_anonymize.py --source 0 --method pixelate # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface import RetinaFace
from uniface.privacy import BlurFace
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def process_image(
detector,
blurrer: BlurFace,
image_path: str,
save_dir: str = 'outputs',
show_detections: bool = False,
):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if show_detections and faces:
from uniface.visualization import draw_detections
preview = image.copy()
bboxes = [face.bbox for face in faces]
scores = [face.confidence for face in faces]
landmarks = [face.landmarks for face in faces]
draw_detections(preview, bboxes, scores, landmarks)
cv2.imshow('Detections (Press any key to continue)', preview)
cv2.waitKey(0)
cv2.destroyAllWindows()
if faces:
anonymized = blurrer.anonymize(image, faces)
else:
anonymized = image
os.makedirs(save_dir, exist_ok=True)
basename = os.path.splitext(os.path.basename(image_path))[0]
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
cv2.imwrite(output_path, anonymized)
print(f'Output saved: {output_path}')
def process_video(
detector,
blurrer: BlurFace,
video_path: str,
save_dir: str = 'outputs',
):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
if faces:
frame = blurrer.anonymize(frame, faces, inplace=True)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
"""Run real-time anonymization on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
if faces:
frame = blurrer.anonymize(frame, faces, inplace=True)
cv2.putText(
frame,
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 255, 0),
2,
)
cv2.imshow('Face Anonymization (Press q to quit)', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(
description='Face anonymization using various blur methods',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Anonymize image with pixelation (default)
python run_anonymization.py --source photo.jpg
# Use Gaussian blur with custom strength
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
# Real-time webcam anonymization
python run_anonymization.py --source 0 --method pixelate
# Black boxes for maximum privacy
python run_anonymization.py --source photo.jpg --method blackout
# Custom pixelation intensity
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
""",
)
# Input/output
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
# Blur method
parser.add_argument(
'--method',
type=str,
default='pixelate',
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
help='Blur method (default: pixelate)',
)
# Method-specific parameters
parser.add_argument(
'--blur-strength',
type=float,
default=3.0,
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
)
parser.add_argument(
'--pixel-blocks',
type=int,
default=20,
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
)
parser.add_argument(
'--color',
type=str,
default='0,0,0',
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
)
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
# Detection
parser.add_argument(
'--confidence-threshold',
type=float,
default=0.5,
help='Detection confidence threshold (default: 0.5)',
)
# Visualization
parser.add_argument(
'--show-detections',
action='store_true',
help='Show detection boxes before blurring (image mode only)',
)
args = parser.parse_args()
# Parse color
color_values = [int(x) for x in args.color.split(',')]
if len(color_values) != 3:
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
color = tuple(color_values)
# Initialize detector
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
# Initialize blurrer
print(f'Initializing blur method: {args.method}')
blurrer = BlurFace(
method=args.method,
blur_strength=args.blur_strength,
pixel_blocks=args.pixel_blocks,
color=color,
margin=args.margin,
)
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, blurrer, int(args.source))
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, blurrer, args.source, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

213
tools/face_emotion.py Normal file
View File

@@ -0,0 +1,213 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Emotion detection on detected faces.
Usage:
python tools/face_emotion.py --source path/to/image.jpg
python tools/face_emotion.py --source path/to/video.mp4
python tools/face_emotion.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, Emotion, RetinaFace
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
"""Draw emotion label above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
text = f'{emotion} ({confidence:.2f})'
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
def process_image(
detector,
emotion_predictor,
image_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for i, face in enumerate(faces):
result = emotion_predictor.predict(image, face.landmarks)
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(
detector,
emotion_predictor,
video_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = emotion_predictor.predict(frame, face.landmarks)
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = emotion_predictor.predict(frame, face.landmarks)
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Emotion Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run emotion detection')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
emotion_predictor = Emotion()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

250
tools/face_parsing.py Normal file
View File

@@ -0,0 +1,250 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face parsing on detected faces.
Usage:
python tools/face_parsing.py --source path/to/image.jpg
python tools/face_parsing.py --source path/to/video.mp4
python tools/face_parsing.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import RetinaFace
from uniface.constants import ParsingWeights
from uniface.parsing import BiSeNet
from uniface.visualization import vis_parsing_maps
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def expand_bbox(
bbox: np.ndarray,
image_shape: tuple[int, int],
expand_ratio: float = 0.2,
expand_top_ratio: float = 0.4,
) -> tuple[int, int, int, int]:
"""
Expand bounding box to include full head region for face parsing.
Face detection typically returns tight face boxes, but face parsing
requires the full head including hair, ears, and neck.
Args:
bbox: Original bounding box [x1, y1, x2, y2].
image_shape: Image dimensions as (height, width).
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
Returns:
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
"""
x1, y1, x2, y2 = map(int, bbox[:4])
height, width = image_shape[:2]
face_width = x2 - x1
face_height = y2 - y1
expand_x = int(face_width * expand_ratio)
expand_y_bottom = int(face_height * expand_ratio)
expand_y_top = int(face_height * expand_top_ratio)
new_x1 = max(0, x1 - expand_x)
new_y1 = max(0, y1 - expand_y_top)
new_x2 = min(width, x2 + expand_x)
new_y2 = min(height, y2 + expand_y_bottom)
return new_x1, new_y1, new_x2, new_y2
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
result_image = image.copy()
for i, face in enumerate(faces):
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
face_crop = image[y1:y2, x1:x2]
if face_crop.size == 0:
continue
mask = parser.parse(face_crop)
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
result_image[y1:y2, x1:x2] = vis_result
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
cv2.imwrite(output_path, result_image)
print(f'Output saved: {output_path}')
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
for face in faces:
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
mask = parser.parse(face_crop)
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
frame[y1:y2, x1:x2] = vis_result
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
mask = parser.parse(face_crop)
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
frame[y1:y2, x1:x2] = vis_result
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Parsing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser_arg = argparse.ArgumentParser(description='Run face parsing')
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
parser_arg.add_argument(
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
)
parser_arg.add_argument(
'--expand-ratio',
type=float,
default=0.2,
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
)
args = parser_arg.parse_args()
detector = RetinaFace()
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

190
tools/face_search.py Normal file
View File

@@ -0,0 +1,190 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Real-time face search: match faces against a reference image.
Usage:
python tools/face_search.py --reference person.jpg --source 0 # webcam
python tools/face_search.py --reference person.jpg --source video.mp4
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface.detection import SCRFD, RetinaFace
from uniface.face_utils import compute_similarity
from uniface.recognition import ArcFace, MobileFace, SphereFace
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def get_recognizer(name: str):
"""Get recognizer by name."""
if name == 'arcface':
return ArcFace()
elif name == 'mobileface':
return MobileFace()
else:
return SphereFace()
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
"""Extract embedding from reference image."""
image = cv2.imread(image_path)
if image is None:
raise RuntimeError(f'Failed to load image: {image_path}')
faces = detector.detect(image)
if not faces:
raise RuntimeError('No faces found in reference image.')
landmarks = faces[0].landmarks
return recognizer.get_normalized_embedding(image, landmarks)
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
"""Process a single frame and return annotated frame."""
faces = detector.detect(frame)
for face in faces:
bbox = face.bbox
landmarks = face.landmarks
x1, y1, x2, y2 = map(int, bbox)
embedding = recognizer.get_normalized_embedding(frame, landmarks)
sim = compute_similarity(ref_embedding, embedding)
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
return frame
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
"""Run real-time face search on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
cv2.imshow('Face Recognition', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Face search using a reference image')
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
parser.add_argument(
'--recognizer',
type=str,
default='arcface',
choices=['arcface', 'mobileface', 'sphereface'],
)
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
if not os.path.exists(args.reference):
print(f'Error: Reference image not found: {args.reference}')
return
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
recognizer = get_recognizer(args.recognizer)
print(f'Loading reference: {args.reference}')
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
else:
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

214
tools/fairface.py Normal file
View File

@@ -0,0 +1,214 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""FairFace attribute prediction (race, gender, age) on detected faces.
Usage:
python tools/fairface.py --source path/to/image.jpg
python tools/fairface.py --source path/to/video.mp4
python tools/fairface.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, RetinaFace
from uniface.attribute import FairFace
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
"""Draw FairFace attributes above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
text = f'{sex}, {age_group}, {race}'
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
def process_image(
detector,
fairface,
image_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for i, face in enumerate(faces):
result = fairface.predict(image, face.bbox)
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(
detector,
fairface,
video_path: str,
save_dir: str = 'outputs',
threshold: float = 0.6,
):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = fairface.predict(frame, face.bbox)
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
for face in faces:
result = fairface.predict(frame, face.bbox)
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('FairFace Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
fairface = FairFace()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, fairface, int(args.source), args.threshold)
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

190
tools/gaze_estimation.py Normal file
View File

@@ -0,0 +1,190 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Gaze estimation on detected faces.
Usage:
python tools/gaze_estimation.py --source path/to/image.jpg
python tools/gaze_estimation.py --source path/to/video.mp4
python tools/gaze_estimation.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import RetinaFace
from uniface.gaze import MobileGaze
from uniface.visualization import draw_gaze
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
for i, face in enumerate(faces):
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = image[y1:y2, x1:x2]
if face_crop.size == 0:
continue
result = gaze_estimator.estimate(face_crop)
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
for face in faces:
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
result = gaze_estimator.estimate(face_crop)
draw_gaze(frame, bbox, result.pitch, result.yaw)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, gaze_estimator, camera_id: int = 0):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
result = gaze_estimator.estimate(face_crop)
draw_gaze(frame, bbox, result.pitch, result.yaw)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Gaze Estimation', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run gaze estimation')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
detector = RetinaFace()
gaze_estimator = MobileGaze()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, gaze_estimator, int(args.source))
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, gaze_estimator, args.source, args.save_dir)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, gaze_estimator, args.source, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

187
tools/landmarks.py Normal file
View File

@@ -0,0 +1,187 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""106-point facial landmark detection.
Usage:
python tools/landmarks.py --source path/to/image.jpg
python tools/landmarks.py --source path/to/video.mp4
python tools/landmarks.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from uniface import SCRFD, Landmark106, RetinaFace
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
"""Process a single image."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
return
for i, face in enumerate(faces):
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
landmarks = landmarker.get_landmarks(image, bbox)
print(f' Face {i + 1}: {len(landmarks)} landmarks')
for x, y in landmarks.astype(int):
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
"""Process a video file."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
for face in faces:
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
landmarks = landmarker.get_landmarks(frame, bbox)
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, landmarker, camera_id: int = 0):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
for face in faces:
bbox = face.bbox
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
landmarks = landmarker.get_landmarks(frame, bbox)
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('106-Point Landmarks', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Run facial landmark detection')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
landmarker = Landmark106()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, landmarker, int(args.source))
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, landmarker, args.source, args.save_dir)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, landmarker, args.source, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

View File

@@ -1,6 +1,13 @@
# Face recognition: extract embeddings or compare two faces
# Usage: python run_recognition.py --image path/to/image.jpg
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face recognition: extract embeddings or compare two faces.
Usage:
python tools/recognition.py --image path/to/image.jpg
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
"""
import argparse

214
tools/spoofing.py Normal file
View File

@@ -0,0 +1,214 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face Anti-Spoofing Detection.
Usage:
python tools/spoofing.py --source path/to/image.jpg
python tools/spoofing.py --source path/to/video.mp4
python tools/spoofing.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import RetinaFace
from uniface.constants import MiniFASNetWeights
from uniface.spoofing import create_spoofer
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def draw_spoofing_result(
image: np.ndarray,
bbox: list,
is_real: bool,
confidence: float,
thickness: int = 2,
) -> None:
"""Draw bounding box with anti-spoofing result.
Args:
image: Input image to draw on.
bbox: Bounding box in [x1, y1, x2, y2] format.
is_real: True if real face, False if fake.
confidence: Confidence score (0.0 to 1.0).
thickness: Line thickness for bounding box.
"""
x1, y1, x2, y2 = map(int, bbox[:4])
color = (0, 255, 0) if is_real else (0, 0, 255)
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
label = 'Real' if is_real else 'Fake'
text = f'{label}: {confidence:.1%}'
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
"""Process a single image for face anti-spoofing detection."""
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
if not faces:
print('No faces detected in the image.')
return
for i, face in enumerate(faces, 1):
result = spoofer.predict(image, face.bbox)
label = 'Real' if result.is_real else 'Fake'
print(f' Face {i}: {label} ({result.confidence:.1%})')
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
cv2.imwrite(output_path, image)
print(f'Output saved: {output_path}')
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
"""Process a video file for face anti-spoofing detection."""
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{video_path}'")
return
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
print(f'Processing video: {video_path} ({total_frames} frames)')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
for face in faces:
result = spoofer.predict(frame, face.bbox)
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
out.write(frame)
if frame_count % 100 == 0:
print(f' Processed {frame_count}/{total_frames} frames...')
cap.release()
out.release()
print(f'Done! Output saved: {output_path}')
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
"""Run real-time anti-spoofing detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
result = spoofer.predict(frame, face.bbox)
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
cv2.imshow('Face Anti-Spoofing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
parser.add_argument(
'--model',
type=str,
default='v2',
choices=['v1se', 'v2'],
help='Model variant: v1se or v2 (default: v2)',
)
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
args = parser.parse_args()
# Select model variant
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
# Initialize models
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
detector = RetinaFace()
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, spoofer, int(args.source))
elif source_type == 'image':
if not os.path.exists(args.source):
print(f'Error: Image not found: {args.source}')
return
process_image(detector, spoofer, args.source, args.save_dir)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
process_video(detector, spoofer, args.source, args.save_dir)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

180
tools/video_detection.py Normal file
View File

@@ -0,0 +1,180 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Face detection on video files with progress tracking.
Usage:
python tools/video_detection.py --source video.mp4
python tools/video_detection.py --source video.mp4 --output output.mp4
python tools/video_detection.py --source 0 # webcam
"""
from __future__ import annotations
import argparse
import os
from pathlib import Path
import cv2
from tqdm import tqdm
from uniface import SCRFD, RetinaFace
from uniface.visualization import draw_detections
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
def get_source_type(source: str) -> str:
"""Determine if source is image, video, or camera."""
if source.isdigit():
return 'camera'
path = Path(source)
suffix = path.suffix.lower()
if suffix in IMAGE_EXTENSIONS:
return 'image'
elif suffix in VIDEO_EXTENSIONS:
return 'video'
else:
return 'unknown'
def process_video(
detector,
input_path: str,
output_path: str,
threshold: float = 0.6,
show_preview: bool = False,
):
"""Process a video file with progress bar."""
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{input_path}'")
return
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
print(f'Output: {output_path}')
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
if not out.isOpened():
print(f"Error: Cannot create output video '{output_path}'")
cap.release()
return
frame_count = 0
total_faces = 0
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
ret, frame = cap.read()
if not ret:
break
frame_count += 1
faces = detector.detect(frame)
total_faces += len(faces)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
out.write(frame)
if show_preview:
cv2.imshow("Processing - Press 'q' to cancel", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
print('\nCancelled by user')
break
cap.release()
out.release()
if show_preview:
cv2.destroyAllWindows()
avg_faces = total_faces / frame_count if frame_count > 0 else 0
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
print(f'Saved: {output_path}')
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
"""Run real-time detection on webcam."""
cap = cv2.VideoCapture(camera_id)
if not cap.isOpened():
print(f'Cannot open camera {camera_id}')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1)
if not ret:
break
faces = detector.detect(frame)
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description='Process video with face detection')
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
parser.add_argument('--preview', action='store_true', help='Show live preview')
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
args = parser.parse_args()
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
source_type = get_source_type(args.source)
if source_type == 'camera':
run_camera(detector, int(args.source), args.threshold)
elif source_type == 'video':
if not os.path.exists(args.source):
print(f'Error: Video not found: {args.source}')
return
# Determine output path
if args.output:
output_path = args.output
else:
os.makedirs(args.save_dir, exist_ok=True)
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
process_video(detector, args.source, output_path, args.threshold, args.preview)
else:
print(f"Error: Unknown source type for '{args.source}'")
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
if __name__ == '__main__':
main()

View File

@@ -11,10 +11,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""UniFace: A comprehensive library for face analysis.
This library provides unified APIs for:
- Face detection (RetinaFace, SCRFD, YOLOv5Face)
- Face recognition (ArcFace, MobileFace, SphereFace)
- Facial landmarks (106-point detection)
- Face parsing (semantic segmentation)
- Gaze estimation
- Age, gender, and emotion prediction
- Face anti-spoofing
- Privacy/anonymization
"""
from __future__ import annotations
__license__ = 'MIT'
__author__ = 'Yakhyokhuja Valikhujaev'
__version__ = '1.5.1'
__version__ = '2.0.0'
from uniface.face_utils import compute_similarity, face_alignment
from uniface.log import Logger, enable_logging
@@ -22,13 +36,7 @@ from uniface.model_store import verify_model_weights
from uniface.visualization import draw_detections, vis_parsing_maps
from .analyzer import FaceAnalyzer
from .attribute import AgeGender
from .face import Face
try:
from .attribute import Emotion
except ImportError:
Emotion = None # PyTorch not installed
from .attribute import AgeGender, FairFace
from .detection import (
SCRFD,
RetinaFace,
@@ -40,9 +48,20 @@ from .detection import (
from .gaze import MobileGaze, create_gaze_estimator
from .landmark import Landmark106, create_landmarker
from .parsing import BiSeNet, create_face_parser
from .privacy import BlurFace, anonymize_faces
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
from .spoofing import MiniFASNet, create_spoofer
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
# Optional: Emotion requires PyTorch
Emotion: type | None
try:
from .attribute import Emotion
except ImportError:
Emotion = None
__all__ = [
# Metadata
'__author__',
'__license__',
'__version__',
@@ -55,6 +74,7 @@ __all__ = [
'create_gaze_estimator',
'create_landmarker',
'create_recognizer',
'create_spoofer',
'detect_faces',
'list_available_detectors',
# Detection models
@@ -68,18 +88,28 @@ __all__ = [
# Landmark models
'Landmark106',
# Gaze models
'GazeResult',
'MobileGaze',
# Parsing models
'BiSeNet',
# Attribute models
'AgeGender',
'AttributeResult',
'Emotion',
'EmotionResult',
'FairFace',
# Spoofing models
'MiniFASNet',
'SpoofingResult',
# Privacy
'BlurFace',
'anonymize_faces',
# Utilities
'Logger',
'compute_similarity',
'draw_detections',
'vis_parsing_maps',
'enable_logging',
'face_alignment',
'verify_model_weights',
'Logger',
'enable_logging',
'vis_parsing_maps',
]

View File

@@ -2,74 +2,102 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Optional
from __future__ import annotations
import numpy as np
from uniface.attribute.age_gender import AgeGender
from uniface.attribute.fairface import FairFace
from uniface.detection.base import BaseDetector
from uniface.face import Face
from uniface.log import Logger
from uniface.recognition.base import BaseRecognizer
from uniface.types import Face
__all__ = ['FaceAnalyzer']
class FaceAnalyzer:
"""Unified face analyzer combining detection, recognition, and attributes."""
"""Unified face analyzer combining detection, recognition, and attributes.
This class provides a high-level interface for face analysis by combining
multiple components: face detection, recognition (embedding extraction),
and attribute prediction (age, gender, race).
Args:
detector: Face detector instance for detecting faces in images.
recognizer: Optional face recognizer for extracting embeddings.
age_gender: Optional age/gender predictor.
fairface: Optional FairFace predictor for demographics.
Example:
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
>>> detector = RetinaFace()
>>> recognizer = ArcFace()
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
>>> faces = analyzer.analyze(image)
"""
def __init__(
self,
detector: BaseDetector,
recognizer: Optional[BaseRecognizer] = None,
age_gender: Optional[AgeGender] = None,
recognizer: BaseRecognizer | None = None,
age_gender: AgeGender | None = None,
fairface: FairFace | None = None,
) -> None:
self.detector = detector
self.recognizer = recognizer
self.age_gender = age_gender
self.fairface = fairface
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
if recognizer:
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
if age_gender:
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
if fairface:
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
def analyze(self, image: np.ndarray) -> List[Face]:
"""Analyze faces in an image."""
detections = self.detector.detect(image)
Logger.debug(f'Detected {len(detections)} face(s)')
def analyze(self, image: np.ndarray) -> list[Face]:
"""Analyze faces in an image.
faces = []
for idx, detection in enumerate(detections):
bbox = detection['bbox']
confidence = detection['confidence']
landmarks = detection['landmarks']
Performs face detection and optionally extracts embeddings and
predicts attributes for each detected face.
embedding = None
Args:
image: Input image as numpy array with shape (H, W, C) in BGR format.
Returns:
List of Face objects with detection results and any predicted attributes.
"""
faces = self.detector.detect(image)
Logger.debug(f'Detected {len(faces)} face(s)')
for idx, face in enumerate(faces):
if self.recognizer is not None:
try:
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
age, gender = None, None
if self.age_gender is not None:
try:
gender, age = self.age_gender.predict(image, bbox)
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
result = self.age_gender.predict(image, face.bbox)
face.gender = result.gender
face.age = result.age
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
face = Face(
bbox=bbox,
confidence=confidence,
landmarks=landmarks,
embedding=embedding,
age=age,
gender=gender,
)
faces.append(face)
if self.fairface is not None:
try:
result = self.fairface.predict(image, face.bbox)
face.gender = result.gender
face.age_group = result.age_group
face.race = result.race
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
return faces
@@ -80,4 +108,6 @@ class FaceAnalyzer:
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
if self.age_gender:
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
if self.fairface:
parts.append(f'fairface={self.fairface.__class__.__name__}')
return ', '.join(parts) + ')'

View File

@@ -2,13 +2,17 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Union
from __future__ import annotations
from typing import Any
import numpy as np
from uniface.attribute.age_gender import AgeGender
from uniface.attribute.base import Attribute
from uniface.constants import AgeGenderWeights, DDAMFNWeights
from uniface.attribute.fairface import FairFace
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
from uniface.types import AttributeResult, EmotionResult, Face
# Emotion requires PyTorch - make it optional
try:
@@ -20,19 +24,30 @@ except ImportError:
_EMOTION_AVAILABLE = False
# Public API for the attribute module
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
__all__ = [
'AgeGender',
'AttributeResult',
'Emotion',
'EmotionResult',
'FairFace',
'create_attribute_predictor',
'predict_attributes',
]
# A mapping from model enums to their corresponding attribute classes
_ATTRIBUTE_MODELS = {
**{model: AgeGender for model in AgeGenderWeights},
**dict.fromkeys(AgeGenderWeights, AgeGender),
**dict.fromkeys(FairFaceWeights, FairFace),
}
# Add Emotion models only if PyTorch is available
if _EMOTION_AVAILABLE:
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
def create_attribute_predictor(
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
) -> Attribute:
"""
Factory function to create an attribute predictor instance.
@@ -41,11 +56,13 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
Args:
model_name: The enum corresponding to the desired attribute model
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
or FairFaceWeights.DEFAULT).
**kwargs: Additional keyword arguments to pass to the model's constructor.
Returns:
An initialized instance of an Attribute predictor class (e.g., AgeGender).
An initialized instance of an Attribute predictor class
(e.g., AgeGender, FairFace, or Emotion).
Raises:
ValueError: If the provided model_name is not a supported enum.
@@ -54,46 +71,44 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
if model_class is None:
raise ValueError(
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
f'Unsupported attribute model: {model_name}. '
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
)
# Pass model_name to the constructor, as some classes might need it
return model_class(model_name=model_name, **kwargs)
def predict_attributes(
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
) -> List[Dict[str, Any]]:
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
"""
High-level API to predict attributes for multiple detected faces.
This function iterates through a list of face detections, runs the
specified attribute predictor on each one, and appends the results back
into the detection dictionary.
This function iterates through a list of Face objects, runs the
specified attribute predictor on each one, and updates the Face
objects with the predicted attributes.
Args:
image (np.ndarray): The full input image in BGR format.
detections (List[Dict]): A list of detection results, where each dict
must contain a 'bbox' and optionally 'landmark'.
faces (List[Face]): A list of Face objects from face detection.
predictor (Attribute): An initialized attribute predictor instance,
created by `create_attribute_predictor`.
Returns:
The list of detections, where each dictionary is updated with a new
'attributes' key containing the prediction result.
List[Face]: The list of Face objects with updated attribute fields.
"""
for face in detections:
# Initialize attributes dict if it doesn't exist
if 'attributes' not in face:
face['attributes'] = {}
for face in faces:
if isinstance(predictor, AgeGender):
gender_id, age = predictor(image, face['bbox'])
face['attributes']['gender_id'] = gender_id
face['attributes']['age'] = age
result = predictor(image, face.bbox)
face.gender = result.gender
face.age = result.age
elif isinstance(predictor, FairFace):
result = predictor(image, face.bbox)
face.gender = result.gender
face.age_group = result.age_group
face.race = result.race
elif isinstance(predictor, Emotion):
emotion, confidence = predictor(image, face['landmark'])
face['attributes']['emotion'] = emotion
face['attributes']['confidence'] = confidence
result = predictor(image, face.landmarks)
face.emotion = result.emotion
face.emotion_confidence = result.confidence
return detections
return faces

View File

@@ -2,7 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Optional, Tuple, Union
import cv2
import numpy as np
@@ -13,6 +12,7 @@ from uniface.face_utils import bbox_center_alignment
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import AttributeResult
__all__ = ['AgeGender']
@@ -35,7 +35,7 @@ class AgeGender(Attribute):
def __init__(
self,
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
input_size: Optional[Tuple[int, int]] = None,
input_size: tuple[int, int] | None = None,
) -> None:
"""
Initializes the AgeGender prediction model.
@@ -81,7 +81,7 @@ class AgeGender(Attribute):
)
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
"""
Aligns the face based on the bounding box and preprocesses it for inference.
@@ -111,7 +111,7 @@ class AgeGender(Attribute):
)
return blob
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
"""
Processes the raw model output to extract gender and age.
@@ -119,16 +119,15 @@ class AgeGender(Attribute):
prediction (np.ndarray): The raw output from the model inference.
Returns:
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
and age (in years).
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
"""
# First two values are gender logits
gender_id = int(np.argmax(prediction[:2]))
gender = int(np.argmax(prediction[:2]))
# Third value is normalized age, scaled by 100
age = int(np.round(prediction[2] * 100))
return gender_id, age
return AttributeResult(gender=gender, age=age)
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
"""
Predicts age and gender for a single face specified by a bounding box.
@@ -137,75 +136,8 @@ class AgeGender(Attribute):
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
Returns:
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
"""
face_blob = self.preprocess(image, bbox)
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
gender_id, age = self.postprocess(prediction)
return gender_id, age
# TODO: below is only for testing, remove it later
if __name__ == '__main__':
# To run this script, you need to have uniface.detection installed
# or available in your path.
from uniface.constants import RetinaFaceWeights
from uniface.detection import create_detector
print('Initializing models for live inference...')
# 1. Initialize the face detector
# Using a smaller model for faster real-time performance
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
# 2. Initialize the attribute predictor
age_gender_predictor = AgeGender()
# 3. Start webcam capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Error: Could not open webcam.')
exit()
print("Starting webcam feed. Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print('Error: Failed to capture frame.')
break
# Detect faces in the current frame
detections = detector.detect(frame)
# For each detected face, predict age and gender
for detection in detections:
box = detection['bbox']
x1, y1, x2, y2 = map(int, box)
# Predict attributes
gender_id, age = age_gender_predictor.predict(frame, box)
gender_str = 'Female' if gender_id == 0 else 'Male'
# Prepare text and draw on the frame
label = f'{gender_str}, {age}'
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(0, 255, 0),
2,
)
# Display the resulting frame
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
print('Inference stopped.')
return self.postprocess(prediction)

View File

@@ -7,6 +7,10 @@ from typing import Any
import numpy as np
from uniface.types import AttributeResult, EmotionResult
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
class Attribute(ABC):
"""

View File

@@ -2,7 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Tuple, Union
import cv2
import numpy as np
@@ -13,6 +12,7 @@ from uniface.constants import DDAMFNWeights
from uniface.face_utils import face_alignment
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.types import EmotionResult
__all__ = ['Emotion']
@@ -29,7 +29,7 @@ class Emotion(Attribute):
def __init__(
self,
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
input_size: Tuple[int, int] = (112, 112),
input_size: tuple[int, int] = (112, 112),
) -> None:
"""
Initializes the emotion recognition model.
@@ -81,7 +81,7 @@ class Emotion(Attribute):
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
"""
Aligns the face using landmarks and preprocesses it into a tensor.
@@ -106,7 +106,7 @@ class Emotion(Attribute):
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
"""
Processes the raw model output to get the emotion label and confidence score.
"""
@@ -114,9 +114,9 @@ class Emotion(Attribute):
pred_index = np.argmax(probabilities)
emotion_label = self.emotion_labels[pred_index]
confidence = float(probabilities[pred_index])
return emotion_label, confidence
return EmotionResult(emotion=emotion_label, confidence=confidence)
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
"""
Predicts the emotion from a single face specified by its landmarks.
"""
@@ -127,68 +127,3 @@ class Emotion(Attribute):
output = output[0]
return self.postprocess(output)
# TODO: below is only for testing, remove it later
if __name__ == '__main__':
from uniface.constants import RetinaFaceWeights
from uniface.detection import create_detector
print('Initializing models for live inference...')
# 1. Initialize the face detector
# Using a smaller model for faster real-time performance
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
# 2. Initialize the attribute predictor
emotion_predictor = Emotion()
# 3. Start webcam capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Error: Could not open webcam.')
exit()
print("Starting webcam feed. Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print('Error: Failed to capture frame.')
break
# Detect faces in the current frame.
# This method returns a list of dictionaries for each detected face.
detections = detector.detect(frame)
# For each detected face, predict the emotion
for detection in detections:
box = detection['bbox']
landmark = detection['landmarks']
x1, y1, x2, y2 = map(int, box)
# Predict attributes using the landmark
emotion, confidence = emotion_predictor.predict(frame, landmark)
# Prepare text and draw on the frame
label = f'{emotion} ({confidence:.2f})'
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(255, 0, 0),
2,
)
# Display the resulting frame
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
print('Inference stopped.')

View File

@@ -0,0 +1,193 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
from uniface.attribute.base import Attribute
from uniface.constants import FairFaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import AttributeResult
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
# Label definitions
RACE_LABELS = [
'White',
'Black',
'Latino Hispanic',
'East Asian',
'Southeast Asian',
'Indian',
'Middle Eastern',
]
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
class FairFace(Attribute):
"""
FairFace attribute prediction model using ONNX Runtime.
This class inherits from the base `Attribute` class and implements the
functionality for predicting race (7 categories), gender (2 categories),
and age (9 groups) from a face image. It requires a bounding box to locate the face.
The model is trained on the FairFace dataset which provides balanced demographics
for more equitable predictions across different racial and gender groups.
Args:
model_name (FairFaceWeights): The enum specifying the model weights to load.
Defaults to `FairFaceWeights.DEFAULT`.
input_size (Optional[Tuple[int, int]]): Input size (height, width).
If None, defaults to (224, 224). Defaults to None.
"""
def __init__(
self,
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
input_size: tuple[int, int] | None = None,
) -> None:
"""
Initializes the FairFace prediction model.
Args:
model_name (FairFaceWeights): The enum specifying the model weights to load.
input_size (Optional[Tuple[int, int]]): Input size (height, width).
If None, defaults to (224, 224).
"""
Logger.info(f'Initializing FairFace with model={model_name.name}')
self.model_path = verify_model_weights(model_name)
self.input_size = input_size if input_size is not None else (224, 224)
self._initialize_model()
def _initialize_model(self) -> None:
"""
Initializes the ONNX model and creates an inference session.
"""
try:
self.session = create_onnx_session(self.model_path)
# Get model input details from the loaded model
input_meta = self.session.get_inputs()[0]
self.input_name = input_meta.name
self.output_names = [output.name for output in self.session.get_outputs()]
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
except Exception as e:
Logger.error(
f"Failed to load FairFace model from '{self.model_path}'",
exc_info=True,
)
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
"""
Preprocesses the face image for inference.
Args:
image (np.ndarray): The input image in BGR format.
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
If None, uses the entire image.
Returns:
np.ndarray: The preprocessed image blob ready for inference.
"""
# Crop face if bbox provided
if bbox is not None:
bbox = np.asarray(bbox, dtype=int)
x1, y1, x2, y2 = bbox[:4]
# Add padding (25% of face size)
w, h = x2 - x1, y2 - y1
padding = 0.25
x_pad = int(w * padding)
y_pad = int(h * padding)
x1 = max(0, x1 - x_pad)
y1 = max(0, y1 - y_pad)
x2 = min(image.shape[1], x2 + x_pad)
y2 = min(image.shape[0], y2 + y_pad)
image = image[y1:y2, x1:x2]
# Resize to input size (width, height for cv2.resize)
image = cv2.resize(image, self.input_size[::-1])
# Convert BGR to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Normalize with ImageNet mean and std
image = image.astype(np.float32) / 255.0
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
image = (image - mean) / std
# Transpose to CHW format and add batch dimension
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis=0)
return image
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
"""
Processes the raw model output to extract race, gender, and age.
Args:
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
(race_logits, gender_logits, age_logits).
Returns:
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
"""
race_logits, gender_logits, age_logits = prediction
# Apply softmax
race_probs = self._softmax(race_logits[0])
gender_probs = self._softmax(gender_logits[0])
age_probs = self._softmax(age_logits[0])
# Get predictions
race_idx = int(np.argmax(race_probs))
raw_gender_idx = int(np.argmax(gender_probs))
age_idx = int(np.argmax(age_probs))
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
gender = 1 - raw_gender_idx
return AttributeResult(
gender=gender,
age_group=AGE_LABELS[age_idx],
race=RACE_LABELS[race_idx],
)
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
"""
Predicts race, gender, and age for a face.
Args:
image (np.ndarray): The input image in BGR format.
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
If None, uses the entire image.
Returns:
AttributeResult: Result containing:
- gender: 0=Female, 1=Male
- age_group: Age range string like "20-29"
- race: Race/ethnicity label
"""
# Preprocess
input_blob = self.preprocess(image, bbox)
# Inference
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
# Postprocess
return self.postprocess(outputs)
@staticmethod
def _softmax(x: np.ndarray) -> np.ndarray:
"""Compute softmax values for numerical stability."""
exp_x = np.exp(x - np.max(x))
return exp_x / np.sum(exp_x)

View File

@@ -2,34 +2,42 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
import itertools
import math
from typing import List, Optional, Tuple
import cv2
import numpy as np
__all__ = [
'resize_image',
'generate_anchors',
'non_max_suppression',
'decode_boxes',
'decode_landmarks',
'distance2bbox',
'distance2kps',
'generate_anchors',
'non_max_suppression',
'resize_image',
]
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
"""
Resize an image to fit within a target shape while keeping its aspect ratio.
def resize_image(
frame: np.ndarray,
target_shape: tuple[int, int] = (640, 640),
) -> tuple[np.ndarray, float]:
"""Resize an image to fit within a target shape while keeping its aspect ratio.
The image is resized to fit within the target dimensions and placed on a
blank canvas (zero-padded to target size).
Args:
frame (np.ndarray): Input image.
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
frame: Input image with shape (H, W, C).
target_shape: Target size as (width, height). Defaults to (640, 640).
Returns:
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
A tuple containing:
- Resized image on a blank canvas with shape (height, width, 3).
- The resize factor as a float.
"""
width, height = target_shape
@@ -53,16 +61,16 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
return image, resize_factor
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
"""
Generate anchor boxes for a given image size (RetinaFace specific).
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
"""Generate anchor boxes for a given image size (RetinaFace specific).
Args:
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
image_size: Input image size as (width, height). Defaults to (640, 640).
Returns:
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
"""
# RetinaFace FPN strides and corresponding anchor sizes per level
steps = [8, 16, 32]
min_sizes = [[16, 32], [64, 128], [256, 512]]
@@ -85,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
return output
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
"""
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
Args:
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
threshold (float): IoU threshold for suppression.
dets: Array of detections with each row as [x1, y1, x2, y2, score].
threshold: IoU threshold for suppression.
Returns:
List[int]: Indices of bounding boxes retained after suppression.
Indices of bounding boxes retained after suppression.
"""
x1 = dets[:, 0]
y1 = dets[:, 1]
@@ -125,18 +132,22 @@ def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
return keep
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
"""
Decode locations from predictions using priors to undo
the encoding done for offset regression at train time (RetinaFace specific).
def decode_boxes(
loc: np.ndarray,
priors: np.ndarray,
variances: list[float] | None = None,
) -> np.ndarray:
"""Decode locations from predictions using priors (RetinaFace specific).
Undoes the encoding done for offset regression at train time.
Args:
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
loc: Location predictions for loc layers, shape: [num_priors, 4].
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
Returns:
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
Decoded bounding box predictions with shape [num_priors, 4].
"""
if variances is None:
variances = [0.1, 0.2]
@@ -155,18 +166,19 @@ def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[f
def decode_landmarks(
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
predictions: np.ndarray,
priors: np.ndarray,
variances: list[float] | None = None,
) -> np.ndarray:
"""
Decode landmark predictions using prior boxes (RetinaFace specific).
"""Decode landmark predictions using prior boxes (RetinaFace specific).
Args:
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
predictions: Landmark predictions, shape: [num_priors, 10].
priors: Prior boxes, shape: [num_priors, 4].
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
Returns:
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
Decoded landmarks, shape: [num_priors, 10].
"""
if variances is None:
variances = [0.1, 0.2]
@@ -187,18 +199,21 @@ def decode_landmarks(
return landmarks
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
"""
Decode distance prediction to bounding box (SCRFD specific).
def distance2bbox(
points: np.ndarray,
distance: np.ndarray,
max_shape: tuple[int, int] | None = None,
) -> np.ndarray:
"""Decode distance prediction to bounding box (SCRFD specific).
Args:
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
distance (np.ndarray): Distance from the given point to 4
boundaries (left, top, right, bottom) with shape (n, 4).
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
points: Anchor points with shape (n, 2), [x, y].
distance: Distance from the given point to 4 boundaries
(left, top, right, bottom) with shape (n, 4).
max_shape: Shape of the image (height, width) for clipping.
Returns:
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
"""
x1 = points[:, 0] - distance[:, 0]
y1 = points[:, 1] - distance[:, 1]
@@ -219,17 +234,20 @@ def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[
return np.stack([x1, y1, x2, y2], axis=-1)
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
"""
Decode distance prediction to keypoints (SCRFD specific).
def distance2kps(
points: np.ndarray,
distance: np.ndarray,
max_shape: tuple[int, int] | None = None,
) -> np.ndarray:
"""Decode distance prediction to keypoints (SCRFD specific).
Args:
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
points: Anchor points with shape (n, 2), [x, y].
distance: Distance from the given point to keypoints with shape (n, 2k).
max_shape: Shape of the image (height, width) for clipping.
Returns:
np.ndarray: Decoded keypoints with shape (n, 2k).
Decoded keypoints with shape (n, 2k).
"""
preds = []
for i in range(0, distance.shape[1], 2):

View File

@@ -3,7 +3,6 @@
# GitHub: https://github.com/yakhyo
from enum import Enum
from typing import Dict
# fmt: off
@@ -88,6 +87,15 @@ class AgeGenderWeights(str, Enum):
DEFAULT = "age_gender"
class FairFaceWeights(str, Enum):
"""
FairFace attribute prediction (race, gender, age).
Trained on FairFace dataset with balanced demographics.
https://github.com/yakhyo/fairface-onnx
"""
DEFAULT = "fairface"
class LandmarkWeights(str, Enum):
"""
MobileNet 0.5 from Insightface
@@ -119,7 +127,21 @@ class ParsingWeights(str, Enum):
RESNET34 = "parsing_resnet34"
MODEL_URLS: Dict[Enum, str] = {
class MiniFASNetWeights(str, Enum):
"""
MiniFASNet: Lightweight Face Anti-Spoofing models.
Trained on face anti-spoofing datasets.
https://github.com/yakhyo/face-anti-spoofing
Model Variants:
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
- V2: Uses scale=2.7 for face crop (improved version)
"""
V1SE = "minifasnet_v1se"
V2 = "minifasnet_v2"
MODEL_URLS: dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
@@ -150,6 +172,8 @@ MODEL_URLS: Dict[Enum, str] = {
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
# AgeGender
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
# FairFace
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
# Landmarks
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
# Gaze (MobileGaze)
@@ -161,9 +185,12 @@ MODEL_URLS: Dict[Enum, str] = {
# Parsing
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
# Anti-Spoofing (MiniFASNet)
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
}
MODEL_SHA256: Dict[Enum, str] = {
MODEL_SHA256: dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
@@ -194,6 +221,8 @@ MODEL_SHA256: Dict[Enum, str] = {
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
# AgeGender
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
# FairFace
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
# Landmark
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
# MobileGaze (trained on Gaze360)
@@ -205,6 +234,9 @@ MODEL_SHA256: Dict[Enum, str] = {
# Face Parsing
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
# Anti-Spoofing (MiniFASNet)
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
}
CHUNK_SIZE = 8192

View File

@@ -2,47 +2,53 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from typing import Any, Dict, List
from typing import Any
import numpy as np
from uniface.types import Face
from .base import BaseDetector
from .retinaface import RetinaFace
from .scrfd import SCRFD
from .yolov5 import YOLOv5Face
# Global cache for detector instances
_detector_cache: Dict[str, BaseDetector] = {}
# Global cache for detector instances (keyed by method name + config hash)
_detector_cache: dict[str, BaseDetector] = {}
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
"""
High-level face detection function.
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
"""High-level face detection function.
Detects faces in an image using the specified detection method.
Results are cached for repeated calls with the same configuration.
Args:
image (np.ndarray): Input image as numpy array.
method (str): Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
image: Input image as numpy array with shape (H, W, C) in BGR format.
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
**kwargs: Additional arguments passed to the detector.
Returns:
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
- 'confidence' (float): The confidence score of the detection.
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
A list of Face objects, each containing:
- bbox: [x1, y1, x2, y2] bounding box coordinates.
- confidence: The confidence score of the detection.
- landmarks: 5-point facial landmarks with shape (5, 2).
Example:
>>> from uniface import detect_faces
>>> image = cv2.imread("your_image.jpg")
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
>>> import cv2
>>> image = cv2.imread('your_image.jpg')
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
>>> for face in faces:
... print(f"Found face with confidence: {face['confidence']}")
... print(f"BBox: {face['bbox']}")
... print(f'Found face with confidence: {face.confidence}')
... print(f'BBox: {face.bbox}')
"""
method_name = method.lower()
sorted_kwargs = sorted(kwargs.items())
cache_key = f'{method_name}_{str(sorted_kwargs)}'
cache_key = f'{method_name}_{sorted_kwargs!s}'
if cache_key not in _detector_cache:
# Pass kwargs to create the correctly configured detector
@@ -52,49 +58,36 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
return detector.detect(image)
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
"""
Factory function to create face detectors.
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
"""Factory function to create face detectors.
Args:
method (str): Detection method. Options:
method: Detection method. Options:
- 'retinaface': RetinaFace detector (default)
- 'scrfd': SCRFD detector (fast and accurate)
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
**kwargs: Detector-specific parameters
**kwargs: Detector-specific parameters.
Returns:
BaseDetector: Initialized detector instance
Initialized detector instance.
Raises:
ValueError: If method is not supported
ValueError: If method is not supported.
Examples:
Example:
>>> # Basic usage
>>> detector = create_detector('retinaface')
>>> # SCRFD detector with custom parameters
>>> from uniface.constants import SCRFDWeights
>>> detector = create_detector(
... 'scrfd',
... model_name=SCRFDWeights.SCRFD_10G_KPS,
... conf_thresh=0.8,
... input_size=(640, 640)
... 'scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.8, input_size=(640, 640)
... )
>>> # RetinaFace detector
>>> from uniface.constants import RetinaFaceWeights
>>> detector = create_detector(
... 'retinaface',
... model_name=RetinaFaceWeights.MNET_V2,
... conf_thresh=0.8,
... nms_thresh=0.4
... )
>>> # YOLOv5-Face detector
>>> detector = create_detector(
... 'yolov5face',
... model_name=YOLOv5FaceWeights.YOLOV5S,
... conf_thresh=0.25,
... nms_thresh=0.45
... 'retinaface', model_name=RetinaFaceWeights.MNET_V2, confidence_threshold=0.8, nms_threshold=0.4
... )
"""
method = method.lower()
@@ -113,12 +106,12 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
"""
List all available detection methods with their descriptions and parameters.
def list_available_detectors() -> dict[str, dict[str, Any]]:
"""List all available detection methods with their descriptions and parameters.
Returns:
Dict[str, Dict[str, Any]]: Dictionary of detector information
Dictionary mapping detector names to their information including
description, landmark support, paper reference, and default parameters.
"""
return {
'retinaface': {
@@ -127,8 +120,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
'paper': 'https://arxiv.org/abs/1905.00641',
'default_params': {
'model_name': 'mnet_v2',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'confidence_threshold': 0.5,
'nms_threshold': 0.4,
'input_size': (640, 640),
},
},
@@ -138,8 +131,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
'paper': 'https://arxiv.org/abs/2105.04714',
'default_params': {
'model_name': 'scrfd_10g_kps',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'confidence_threshold': 0.5,
'nms_threshold': 0.4,
'input_size': (640, 640),
},
},
@@ -149,8 +142,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
'paper': 'https://arxiv.org/abs/2105.12931',
'default_params': {
'model_name': 'yolov5s_face',
'conf_thresh': 0.25,
'nms_thresh': 0.45,
'confidence_threshold': 0.25,
'nms_threshold': 0.45,
'input_size': 640,
},
},
@@ -158,11 +151,11 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
__all__ = [
'detect_faces',
'create_detector',
'list_available_detectors',
'SCRFD',
'BaseDetector',
'RetinaFace',
'YOLOv5Face',
'BaseDetector',
'create_detector',
'detect_faces',
'list_available_detectors',
]

View File

@@ -2,75 +2,82 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, List
from typing import Any
import numpy as np
from uniface.types import Face
__all__ = ['BaseDetector']
class BaseDetector(ABC):
"""
Abstract base class for all face detectors.
"""Abstract base class for all face detectors.
This class defines the interface that all face detectors must implement,
ensuring consistency across different detection methods.
Attributes:
config: Dictionary containing detector configuration parameters.
_supports_landmarks: Flag indicating if detector supports landmark detection.
"""
def __init__(self, **kwargs):
"""Initialize the detector with configuration parameters."""
self.config = kwargs
@abstractmethod
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
"""
Detect faces in an image.
def __init__(self, **kwargs: Any) -> None:
"""Initialize the detector with configuration parameters.
Args:
image (np.ndarray): Input image as numpy array with shape (H, W, C)
**kwargs: Additional detection parameters
**kwargs: Detector-specific configuration parameters.
"""
self.config: dict[str, Any] = kwargs
self._supports_landmarks: bool = False
@abstractmethod
def detect(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
"""Detect faces in an image.
Args:
image: Input image as numpy array with shape (H, W, C) in BGR format.
**kwargs: Additional detection parameters.
Returns:
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
or (68, 2) for 68-point landmarks. Empty array if not supported.
List of detected Face objects, each containing:
- bbox: Bounding box coordinates with shape (4,) as [x1, y1, x2, y2].
- confidence: Detection confidence score (0.0 to 1.0).
- landmarks: Facial landmarks with shape (5, 2) for 5-point landmarks.
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
"""
pass
@abstractmethod
def preprocess(self, image: np.ndarray) -> np.ndarray:
"""
Preprocess input image for detection.
"""Preprocess input image for detection.
Args:
image (np.ndarray): Input image
image: Input image with shape (H, W, C).
Returns:
np.ndarray: Preprocessed image tensor
Preprocessed image tensor ready for inference.
"""
pass
@abstractmethod
def postprocess(self, outputs, **kwargs) -> Any:
"""
Postprocess model outputs to get final detections.
def postprocess(self, outputs: Any, **kwargs: Any) -> Any:
"""Postprocess model outputs to get final detections.
Args:
outputs: Raw model outputs
**kwargs: Additional postprocessing parameters
outputs: Raw model outputs.
**kwargs: Additional postprocessing parameters.
Returns:
Any: Processed outputs (implementation-specific format, typically tuple of arrays)
Processed outputs (implementation-specific format, typically tuple of arrays).
"""
pass
def __str__(self) -> str:
"""String representation of the detector."""
@@ -82,23 +89,33 @@ class BaseDetector(ABC):
@property
def supports_landmarks(self) -> bool:
"""
Whether this detector supports landmark detection.
"""Whether this detector supports landmark detection.
Returns:
bool: True if landmarks are supported, False otherwise
True if landmarks are supported, False otherwise.
"""
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
def get_info(self) -> Dict[str, Any]:
"""
Get detector information and configuration.
def get_info(self) -> dict[str, Any]:
"""Get detector information and configuration.
Returns:
Dict[str, Any]: Detector information
Dictionary containing detector name, landmark support, and config.
"""
return {
'name': self.__class__.__name__,
'supports_landmarks': self._supports_landmarks,
'config': self.config,
}
def __call__(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
"""Callable shortcut for the `detect` method.
Args:
image: Input image as numpy array with shape (H, W, C) in BGR format.
**kwargs: Additional detection parameters.
Returns:
List of detected Face objects.
"""
return self.detect(image, **kwargs)

View File

@@ -2,7 +2,9 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from __future__ import annotations
from typing import Any, Literal
import numpy as np
@@ -17,6 +19,7 @@ from uniface.constants import RetinaFaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import Face
from .base import BaseDetector
@@ -31,8 +34,8 @@ class RetinaFace(BaseDetector):
Args:
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
nms_thresh (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
nms_threshold (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
input_size (Tuple[int, int]): Fixed input size (width, height) if `dynamic_size=False`.
Defaults to (640, 640).
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
@@ -43,8 +46,8 @@ class RetinaFace(BaseDetector):
Attributes:
model_name (RetinaFaceWeights): Selected model variant.
conf_thresh (float): Threshold for confidence-based filtering.
nms_thresh (float): IoU threshold used for NMS.
confidence_threshold (float): Threshold for confidence-based filtering.
nms_threshold (float): IoU threshold used for NMS.
pre_nms_topk (int): Limit on proposals before applying NMS.
post_nms_topk (int): Limit on retained detections after NMS.
dynamic_size (bool): Flag indicating dynamic or static input sizing.
@@ -62,23 +65,23 @@ class RetinaFace(BaseDetector):
self,
*,
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
conf_thresh: float = 0.5,
nms_thresh: float = 0.4,
input_size: Tuple[int, int] = (640, 640),
confidence_threshold: float = 0.5,
nms_threshold: float = 0.4,
input_size: tuple[int, int] = (640, 640),
**kwargs: Any,
) -> None:
super().__init__(
model_name=model_name,
conf_thresh=conf_thresh,
nms_thresh=nms_thresh,
confidence_threshold=confidence_threshold,
nms_threshold=nms_threshold,
input_size=input_size,
**kwargs,
)
self._supports_landmarks = True # RetinaFace supports landmarks
self.model_name = model_name
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
self.confidence_threshold = confidence_threshold
self.nms_threshold = nms_threshold
self.input_size = input_size
# Advanced options from kwargs
@@ -87,8 +90,8 @@ class RetinaFace(BaseDetector):
self.dynamic_size = kwargs.get('dynamic_size', False)
Logger.info(
f'Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, '
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
f'Initializing RetinaFace with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
)
# Get path to model weights
@@ -104,14 +107,13 @@ class RetinaFace(BaseDetector):
self._initialize_model(self._model_path)
def _initialize_model(self, model_path: str) -> None:
"""
Initializes an ONNX model session from the given path.
"""Initialize an ONNX model session from the given path.
Args:
model_path (str): The file path to the ONNX model.
model_path: The file path to the ONNX model.
Raises:
RuntimeError: If the model fails to load, logs an error and raises an exception.
RuntimeError: If the model fails to load.
"""
try:
self.session = create_onnx_session(model_path)
@@ -136,14 +138,14 @@ class RetinaFace(BaseDetector):
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
return image
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
"""Perform model inference on the preprocessed image tensor.
Args:
input_tensor (np.ndarray): Preprocessed input tensor.
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
Returns:
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
List of raw model outputs (location, confidence, landmarks).
"""
return self.session.run(self.output_names, {self.input_names: input_tensor})
@@ -154,7 +156,7 @@ class RetinaFace(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> list[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -168,19 +170,19 @@ class RetinaFace(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -229,51 +231,53 @@ class RetinaFace(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces
def postprocess(
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
) -> Tuple[np.ndarray, np.ndarray]:
"""
Process the model outputs into final detection results.
self,
outputs: list[np.ndarray],
resize_factor: float,
shape: tuple[int, int],
) -> tuple[np.ndarray, np.ndarray]:
"""Process the model outputs into final detection results.
Args:
outputs (List[np.ndarray]): Raw outputs from the detection model.
outputs: Raw outputs from the detection model containing:
- outputs[0]: Location predictions (bounding box coordinates).
- outputs[1]: Class confidence scores.
- outputs[2]: Landmark predictions.
resize_factor (float): Factor used to resize the input image during preprocessing.
shape (Tuple[int, int]): Original shape of the image as (height, width).
resize_factor: Factor used to resize the input image during preprocessing.
shape: Original shape of the image as (width, height).
Returns:
Tuple[np.ndarray, np.ndarray]: Processed results containing:
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
- landmarks (np.ndarray): Array of detected facial landmarks.
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
A tuple containing:
- detections: Array of detected bounding boxes with confidence scores,
shape (num_detections, 5), each row is [x1, y1, x2, y2, score].
- landmarks: Array of detected facial landmarks,
shape (num_detections, 5, 2), each row contains 5 landmark points (x, y).
"""
loc, conf, landmarks = (
location_predictions, confidence_scores, landmark_predictions = (
outputs[0].squeeze(0),
outputs[1].squeeze(0),
outputs[2].squeeze(0),
)
# Decode boxes and landmarks
boxes = decode_boxes(loc, self._priors)
landmarks = decode_landmarks(landmarks, self._priors)
boxes = decode_boxes(location_predictions, self._priors)
landmarks = decode_landmarks(landmark_predictions, self._priors)
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
# Extract confidence scores for the face class
scores = conf[:, 1]
mask = scores > self.conf_thresh
scores = confidence_scores[:, 1]
mask = scores > self.confidence_threshold
# Filter by confidence threshold
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
@@ -284,7 +288,7 @@ class RetinaFace(BaseDetector):
# Apply NMS
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
keep = non_max_suppression(detections, self.nms_thresh)
keep = non_max_suppression(detections, self.nms_threshold)
detections, landmarks = detections[keep], landmarks[keep]
# Keep top-k detections
@@ -302,9 +306,9 @@ class RetinaFace(BaseDetector):
boxes: np.ndarray,
landmarks: np.ndarray,
resize_factor: float,
shape: Tuple[int, int],
) -> Tuple[np.ndarray, np.ndarray]:
# Scale bounding boxes and landmarks to the original image size.
shape: tuple[int, int],
) -> tuple[np.ndarray, np.ndarray]:
"""Scale bounding boxes and landmarks to the original image size."""
bbox_scale = np.array([shape[0], shape[1]] * 2)
boxes = boxes * bbox_scale / resize_factor
@@ -312,72 +316,3 @@ class RetinaFace(BaseDetector):
landmarks = landmarks * landmark_scale / resize_factor
return boxes, landmarks
# TODO: below is only for testing, remove it later
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
for x, y in points.astype(np.int32):
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
if __name__ == '__main__':
import cv2
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
print(detector.get_info())
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Failed to open webcam.')
exit()
print("Webcam started. Press 'q' to exit.")
while True:
ret, frame = cap.read()
if not ret:
print('Failed to read frame.')
break
# Get face detections as list of dictionaries
faces = detector.detect(frame)
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Display face count
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow('FaceDetection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

View File

@@ -2,9 +2,10 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from __future__ import annotations
from typing import Any, Literal
import cv2
import numpy as np
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
@@ -12,6 +13,7 @@ from uniface.constants import SCRFDWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import Face
from .base import BaseDetector
@@ -29,8 +31,8 @@ class SCRFD(BaseDetector):
Args:
model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`).
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.4.
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.4.
input_size (Tuple[int, int]): Input image size (width, height).
Defaults to (640, 640).
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
@@ -38,10 +40,10 @@ class SCRFD(BaseDetector):
Attributes:
model_name (SCRFDWeights): Selected model variant.
conf_thresh (float): Threshold used to filter low-confidence detections.
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
confidence_threshold (float): Threshold used to filter low-confidence detections.
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
_fmc (int): Number of feature map levels used in the model.
_num_feature_maps (int): Number of feature map levels used in the model.
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
_num_anchors (int): Number of anchors per feature location.
_center_cache (Dict): Cached anchor centers for efficient forward passes.
@@ -56,35 +58,35 @@ class SCRFD(BaseDetector):
self,
*,
model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
conf_thresh: float = 0.5,
nms_thresh: float = 0.4,
input_size: Tuple[int, int] = (640, 640),
confidence_threshold: float = 0.5,
nms_threshold: float = 0.4,
input_size: tuple[int, int] = (640, 640),
**kwargs: Any,
) -> None:
super().__init__(
model_name=model_name,
conf_thresh=conf_thresh,
nms_thresh=nms_thresh,
confidence_threshold=confidence_threshold,
nms_threshold=nms_threshold,
input_size=input_size,
**kwargs,
)
self._supports_landmarks = True # SCRFD supports landmarks
self.model_name = model_name
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
self.confidence_threshold = confidence_threshold
self.nms_threshold = nms_threshold
self.input_size = input_size
# ------- SCRFD model params ------
self._fmc = 3
self._num_feature_maps = 3
self._feat_stride_fpn = [8, 16, 32]
self._num_anchors = 2
self._center_cache = {}
# ---------------------------------
Logger.info(
f'Initializing SCRFD with model={self.model_name}, conf_thresh={self.conf_thresh}, '
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
f'Initializing SCRFD with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
)
# Get path to model weights
@@ -95,14 +97,13 @@ class SCRFD(BaseDetector):
self._initialize_model(self._model_path)
def _initialize_model(self, model_path: str) -> None:
"""
Initializes an ONNX model session from the given path.
"""Initialize an ONNX model session from the given path.
Args:
model_path (str): The file path to the ONNX model.
model_path: The file path to the ONNX model.
Raises:
RuntimeError: If the model fails to load, logs an error and raises an exception.
RuntimeError: If the model fails to load.
"""
try:
self.session = create_onnx_session(model_path)
@@ -113,14 +114,14 @@ class SCRFD(BaseDetector):
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
def preprocess(self, image: np.ndarray) -> np.ndarray:
"""Preprocess image for inference.
Args:
image (np.ndarray): Input image
image: Input image with shape (H, W, C).
Returns:
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
Preprocessed image tensor with shape (1, C, H, W).
"""
image = image.astype(np.float32)
image = (image - 127.5) / 127.5
@@ -129,29 +130,42 @@ class SCRFD(BaseDetector):
return image
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
"""Perform model inference on the preprocessed image tensor.
Args:
input_tensor (np.ndarray): Preprocessed input tensor.
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
Returns:
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
List of raw model outputs.
"""
return self.session.run(self.output_names, {self.input_names: input_tensor})
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
scores_list = []
def postprocess(
self,
outputs: list[np.ndarray],
image_size: tuple[int, int],
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
"""Process model outputs into detection results.
Args:
outputs: Raw outputs from the detection model.
image_size: Size of the input image as (height, width).
Returns:
Tuple of (scores_list, bboxes_list, landmarks_list).
"""
scores_list: list[np.ndarray] = []
bboxes_list = []
kpss_list = []
image_size = image_size
fmc = self._fmc
num_feature_maps = self._num_feature_maps
for idx, stride in enumerate(self._feat_stride_fpn):
scores = outputs[idx]
bbox_preds = outputs[fmc + idx] * stride
kps_preds = outputs[2 * fmc + idx] * stride
bbox_preds = outputs[num_feature_maps + idx] * stride
kps_preds = outputs[2 * num_feature_maps + idx] * stride
# Generate anchors
fm_height = image_size[0] // stride
@@ -171,7 +185,7 @@ class SCRFD(BaseDetector):
if len(self._center_cache) < 100:
self._center_cache[cache_key] = anchor_centers
pos_indices = np.where(scores >= self.conf_thresh)[0]
pos_indices = np.where(scores >= self.confidence_threshold)[0]
if len(pos_indices) == 0:
continue
@@ -193,7 +207,7 @@ class SCRFD(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> list[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -207,19 +221,19 @@ class SCRFD(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -247,7 +261,7 @@ class SCRFD(BaseDetector):
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
pre_det = pre_det[order, :]
keep = non_max_suppression(pre_det, threshold=self.nms_thresh)
keep = non_max_suppression(pre_det, threshold=self.nms_threshold)
detections = pre_det[keep, :]
landmarks = landmarks[order, :, :]
@@ -280,78 +294,11 @@ class SCRFD(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces
# TODO: below is only for testing, remove it later
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
for x, y in points.astype(np.int32):
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
if __name__ == '__main__':
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
print(detector.get_info())
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Failed to open webcam.')
exit()
print("Webcam started. Press 'q' to exit.")
while True:
ret, frame = cap.read()
if not ret:
print('Failed to read frame.')
break
# Get face detections as list of dictionaries
faces = detector.detect(frame)
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Display face count
cv2.putText(
frame,
f'Faces: {len(faces)}',
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow('FaceDetection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

View File

@@ -2,7 +2,7 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from typing import Any, Literal
import cv2
import numpy as np
@@ -12,6 +12,7 @@ from uniface.constants import YOLOv5FaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import Face
from .base import BaseDetector
@@ -29,8 +30,8 @@ class YOLOv5Face(BaseDetector):
Args:
model_name (YOLOv5FaceWeights): Predefined model enum (e.g., `YOLOV5S`).
Specifies the YOLOv5-Face variant to load. Defaults to YOLOV5S.
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.6.
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.5.
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.6.
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.5.
input_size (int): Input image size. Defaults to 640.
Note: ONNX model is fixed at 640. Changing this will cause inference errors.
**kwargs: Advanced options:
@@ -38,8 +39,8 @@ class YOLOv5Face(BaseDetector):
Attributes:
model_name (YOLOv5FaceWeights): Selected model variant.
conf_thresh (float): Threshold used to filter low-confidence detections.
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
confidence_threshold (float): Threshold used to filter low-confidence detections.
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
input_size (int): Image size to which inputs are resized before inference.
max_det (int): Maximum number of detections to return.
_model_path (str): Absolute path to the downloaded/verified model weights.
@@ -53,15 +54,15 @@ class YOLOv5Face(BaseDetector):
self,
*,
model_name: YOLOv5FaceWeights = YOLOv5FaceWeights.YOLOV5S,
conf_thresh: float = 0.6,
nms_thresh: float = 0.5,
confidence_threshold: float = 0.6,
nms_threshold: float = 0.5,
input_size: int = 640,
**kwargs: Any,
) -> None:
super().__init__(
model_name=model_name,
conf_thresh=conf_thresh,
nms_thresh=nms_thresh,
confidence_threshold=confidence_threshold,
nms_threshold=nms_threshold,
input_size=input_size,
**kwargs,
)
@@ -74,16 +75,16 @@ class YOLOv5Face(BaseDetector):
)
self.model_name = model_name
self.conf_thresh = conf_thresh
self.nms_thresh = nms_thresh
self.confidence_threshold = confidence_threshold
self.nms_threshold = nms_threshold
self.input_size = input_size
# Advanced options from kwargs
self.max_det = kwargs.get('max_det', 750)
Logger.info(
f'Initializing YOLOv5Face with model={self.model_name}, conf_thresh={self.conf_thresh}, '
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
f'Initializing YOLOv5Face with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
)
# Get path to model weights
@@ -112,7 +113,7 @@ class YOLOv5Face(BaseDetector):
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
def preprocess(self, image: np.ndarray) -> tuple[np.ndarray, float, tuple[int, int]]:
"""
Preprocess image for inference.
@@ -153,7 +154,7 @@ class YOLOv5Face(BaseDetector):
return img_batch, scale, (pad_w, pad_h)
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
"""Perform model inference on the preprocessed image tensor.
Args:
@@ -168,8 +169,8 @@ class YOLOv5Face(BaseDetector):
self,
predictions: np.ndarray,
scale: float,
padding: Tuple[int, int],
) -> Tuple[np.ndarray, np.ndarray]:
padding: tuple[int, int],
) -> tuple[np.ndarray, np.ndarray]:
"""
Postprocess model predictions.
@@ -189,7 +190,7 @@ class YOLOv5Face(BaseDetector):
predictions = predictions[0] # Remove batch dimension
# Filter by confidence
mask = predictions[:, 4] >= self.conf_thresh
mask = predictions[:, 4] >= self.confidence_threshold
predictions = predictions[mask]
if len(predictions) == 0:
@@ -206,7 +207,7 @@ class YOLOv5Face(BaseDetector):
# Apply NMS
detections_for_nms = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
keep = non_max_suppression(detections_for_nms, self.nms_thresh)
keep = non_max_suppression(detections_for_nms, self.nms_threshold)
if len(keep) == 0:
return np.array([]), np.array([])
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> list[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces

View File

@@ -1,66 +0,0 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from dataclasses import asdict, dataclass
from typing import Optional
import numpy as np
from uniface.face_utils import compute_similarity
__all__ = ['Face']
@dataclass
class Face:
"""
Detected face with analysis results.
"""
# Required attributes
bbox: np.ndarray
confidence: float
landmarks: np.ndarray
# Optional attributes
embedding: Optional[np.ndarray] = None
age: Optional[int] = None
gender: Optional[int] = None # 0 or 1
def compute_similarity(self, other: 'Face') -> float:
"""Compute cosine similarity with another face."""
if self.embedding is None or other.embedding is None:
raise ValueError('Both faces must have embeddings for similarity computation')
return float(compute_similarity(self.embedding, other.embedding))
def to_dict(self) -> dict:
"""Convert to dictionary."""
return asdict(self)
@property
def sex(self) -> str:
"""Get gender as a string label (Female or Male)."""
if self.gender is None:
return None
return 'Female' if self.gender == 0 else 'Male'
@property
def bbox_xyxy(self) -> np.ndarray:
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
return self.bbox.copy()
@property
def bbox_xywh(self) -> np.ndarray:
"""Get bounding box coordinates in (x1, y1, w, h) format."""
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
def __repr__(self) -> str:
parts = [f'Face(confidence={self.confidence:.3f}']
if self.age is not None:
parts.append(f'age={self.age}')
if self.gender is not None:
parts.append(f'sex={self.sex}')
if self.embedding is not None:
parts.append(f'embedding_dim={self.embedding.shape[0]}')
return ', '.join(parts) + ')'

View File

@@ -2,21 +2,21 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Tuple, Union
from __future__ import annotations
import cv2
import numpy as np
from skimage.transform import SimilarityTransform
__all__ = [
'face_alignment',
'compute_similarity',
'bbox_center_alignment',
'compute_similarity',
'face_alignment',
'transform_points_2d',
]
# Reference alignment for facial landmarks (ArcFace)
# Standard 5-point facial landmark reference for ArcFace alignment (112x112)
reference_alignment: np.ndarray = np.array(
[
[38.2946, 51.6963],
@@ -29,22 +29,25 @@ reference_alignment: np.ndarray = np.array(
)
def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
"""
Estimate the normalization transformation matrix for facial landmarks.
def estimate_norm(
landmark: np.ndarray,
image_size: int | tuple[int, int] = 112,
) -> tuple[np.ndarray, np.ndarray]:
"""Estimate the normalization transformation matrix for facial landmarks.
Args:
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
Can be an integer (for square images) or a tuple (width, height). Default is 112.
landmark: Array of shape (5, 2) representing the coordinates of the facial landmarks.
image_size: The size of the output image. Can be an integer (for square images)
or a tuple (width, height). Default is 112.
Returns:
np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
A tuple containing:
- The 2x3 transformation matrix for aligning the landmarks.
- The 2x3 inverse transformation matrix.
Raises:
AssertionError: If the input landmark array does not have the shape (5, 2)
or if image_size is not a multiple of 112 or 128.
or if image_size is not a multiple of 112 or 128.
"""
assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
@@ -80,23 +83,23 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
def face_alignment(
image: np.ndarray,
landmark: np.ndarray,
image_size: Union[int, Tuple[int, int]] = 112,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Align the face in the input image based on the given facial landmarks.
image_size: int | tuple[int, int] = 112,
) -> tuple[np.ndarray, np.ndarray]:
"""Align the face in the input image based on the given facial landmarks.
Args:
image (np.ndarray): Input image as a NumPy array.
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
Can be an integer (for square images) or a tuple (width, height). Default is 112.
image: Input image as a NumPy array with shape (H, W, C).
landmark: Array of shape (5, 2) representing the facial landmark coordinates.
image_size: The size of the aligned output image. Can be an integer
(for square images) or a tuple (width, height). Default is 112.
Returns:
np.ndarray: The aligned face as a NumPy array.
np.ndarray: The 2x3 transformation matrix used for alignment.
A tuple containing:
- The aligned face as a NumPy array.
- The 2x3 inverse transformation matrix used for alignment.
"""
# Get the transformation matrix
M, M_inv = estimate_norm(landmark, image_size)
transform_matrix, inverse_transform = estimate_norm(landmark, image_size)
# Handle both int and tuple for warpAffine output size
if isinstance(image_size, int):
@@ -105,44 +108,50 @@ def face_alignment(
output_size = image_size
# Warp the input image to align the face
warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
warped = cv2.warpAffine(image, transform_matrix, output_size, borderValue=0.0)
return warped, M_inv
return warped, inverse_transform
def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
"""Computing Similarity between two faces.
"""Compute cosine similarity between two face embeddings.
Args:
feat1 (np.ndarray): First embedding.
feat2 (np.ndarray): Second embedding.
normalized (bool): Set True if the embeddings are already L2 normalized.
feat1: First embedding vector.
feat2: Second embedding vector.
normalized: Set True if the embeddings are already L2 normalized.
Returns:
np.float32: Cosine similarity.
Cosine similarity score in range [-1, 1].
"""
feat1 = feat1.ravel()
feat2 = feat2.ravel()
if normalized:
return np.dot(feat1, feat2)
else:
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
# Add small epsilon to prevent division by zero
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
def bbox_center_alignment(image, center, output_size, scale, rotation):
"""
Apply center-based alignment, scaling, and rotation to an image.
def bbox_center_alignment(
image: np.ndarray,
center: tuple[float, float],
output_size: int,
scale: float,
rotation: float,
) -> tuple[np.ndarray, np.ndarray]:
"""Apply center-based alignment, scaling, and rotation to an image.
Args:
image (np.ndarray): Input image.
center (Tuple[float, float]): Center point (e.g., face center from bbox).
output_size (int): Desired output image size (square).
scale (float): Scaling factor to zoom in/out.
rotation (float): Rotation angle in degrees (clockwise).
image: Input image with shape (H, W, C).
center: Center point (x, y), e.g., face center from bbox.
output_size: Desired output image size (square).
scale: Scaling factor to zoom in/out.
rotation: Rotation angle in degrees (clockwise).
Returns:
cropped (np.ndarray): Aligned and cropped image.
M (np.ndarray): 2x3 affine transform matrix used.
A tuple containing:
- Aligned and cropped image with shape (output_size, output_size, C).
- 2x3 affine transform matrix used.
"""
# Convert rotation from degrees to radians
@@ -175,15 +184,14 @@ def bbox_center_alignment(image, center, output_size, scale, rotation):
def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
"""
Apply a 2D affine transformation to an array of 2D points.
"""Apply a 2D affine transformation to an array of 2D points.
Args:
points (np.ndarray): An (N, 2) array of 2D points.
transform (np.ndarray): A (2, 3) affine transformation matrix.
points: An (N, 2) array of 2D points.
transform: A (2, 3) affine transformation matrix.
Returns:
np.ndarray: Transformed (N, 2) array of points.
Transformed (N, 2) array of points.
"""
transformed = np.zeros_like(points, dtype=np.float32)
for i in range(points.shape[0]):

View File

@@ -2,6 +2,8 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from uniface.types import GazeResult
from .base import BaseGazeEstimator
from .models import MobileGaze
@@ -34,13 +36,11 @@ def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstim
>>> # Create with MobileNetV2 backbone
>>> from uniface.constants import GazeWeights
>>> estimator = create_gaze_estimator(
... 'mobilegaze',
... model_name=GazeWeights.MOBILENET_V2
... )
>>> estimator = create_gaze_estimator('mobilegaze', model_name=GazeWeights.MOBILENET_V2)
>>> # Use the estimator
>>> pitch, yaw = estimator.estimate(face_crop)
>>> result = estimator.estimate(face_crop)
>>> print(f'Pitch: {result.pitch}, Yaw: {result.yaw}')
"""
method = method.lower()
@@ -51,8 +51,4 @@ def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstim
raise ValueError(f"Unsupported gaze estimation method: '{method}'. Available: {available}")
__all__ = [
'create_gaze_estimator',
'MobileGaze',
'BaseGazeEstimator',
]
__all__ = ['BaseGazeEstimator', 'GazeResult', 'MobileGaze', 'create_gaze_estimator']

View File

@@ -2,11 +2,16 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Tuple
import numpy as np
from uniface.types import GazeResult
__all__ = ['BaseGazeEstimator', 'GazeResult']
class BaseGazeEstimator(ABC):
"""
@@ -54,7 +59,7 @@ class BaseGazeEstimator(ABC):
raise NotImplementedError('Subclasses must implement the preprocess method.')
@abstractmethod
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[float, float]:
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
"""
Postprocess raw model outputs into gaze angles.
@@ -66,12 +71,12 @@ class BaseGazeEstimator(ABC):
on the specific model architecture.
Returns:
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
GazeResult: Result containing pitch and yaw angles in radians.
"""
raise NotImplementedError('Subclasses must implement the postprocess method.')
@abstractmethod
def estimate(self, face_image: np.ndarray) -> Tuple[float, float]:
def estimate(self, face_image: np.ndarray) -> GazeResult:
"""
Perform end-to-end gaze estimation on a face image.
@@ -84,18 +89,18 @@ class BaseGazeEstimator(ABC):
well-framed within the image.
Returns:
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians:
GazeResult: Result containing pitch and yaw angles in radians:
- pitch: Vertical gaze angle (positive = up, negative = down)
- yaw: Horizontal gaze angle (positive = right, negative = left)
Example:
>>> estimator = create_gaze_estimator()
>>> pitch, yaw = estimator.estimate(face_crop)
>>> print(f"Looking: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
>>> result = estimator.estimate(face_crop)
>>> print(f'Looking: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
"""
raise NotImplementedError('Subclasses must implement the estimate method.')
def __call__(self, face_image: np.ndarray) -> Tuple[float, float]:
def __call__(self, face_image: np.ndarray) -> GazeResult:
"""
Provides a convenient, callable shortcut for the `estimate` method.
@@ -103,6 +108,6 @@ class BaseGazeEstimator(ABC):
face_image (np.ndarray): A cropped face image in BGR format.
Returns:
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
GazeResult: Result containing pitch and yaw angles in radians.
"""
return self.estimate(face_image)

View File

@@ -2,7 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Tuple
import cv2
import numpy as np
@@ -11,6 +10,7 @@ from uniface.constants import GazeWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import GazeResult
from .base import BaseGazeEstimator
@@ -54,17 +54,17 @@ class MobileGaze(BaseGazeEstimator):
>>> # Detect faces and estimate gaze for each
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox']
... bbox = face.bbox
... x1, y1, x2, y2 = map(int, bbox[:4])
... face_crop = image[y1:y2, x1:x2]
... pitch, yaw = gaze_estimator.estimate(face_crop)
... print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
... result = gaze_estimator.estimate(face_crop)
... print(f'Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
"""
def __init__(
self,
model_name: GazeWeights = GazeWeights.RESNET34,
input_size: Tuple[int, int] = (448, 448),
input_size: tuple[int, int] = (448, 448),
) -> None:
Logger.info(f'Initializing MobileGaze with model={model_name}, input_size={input_size}')
@@ -143,7 +143,7 @@ class MobileGaze(BaseGazeEstimator):
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
"""
Postprocess raw model outputs into gaze angles.
@@ -155,7 +155,7 @@ class MobileGaze(BaseGazeEstimator):
on the specific model architecture.
Returns:
Tuple[np.ndarray, np.ndarray]: A tuple of (pitch, yaw) angles in radians.
GazeResult: Result containing pitch and yaw angles in radians.
"""
pitch_logits, yaw_logits = outputs
@@ -168,12 +168,12 @@ class MobileGaze(BaseGazeEstimator):
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
# Convert degrees to radians
pitch = np.radians(pitch_deg[0])
yaw = np.radians(yaw_deg[0])
pitch = float(np.radians(pitch_deg[0]))
yaw = float(np.radians(yaw_deg[0]))
return pitch, yaw
return GazeResult(pitch=pitch, yaw=yaw)
def estimate(self, face_image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
def estimate(self, face_image: np.ndarray) -> GazeResult:
"""
Perform end-to-end gaze estimation on a face image.
@@ -182,6 +182,5 @@ class MobileGaze(BaseGazeEstimator):
"""
input_tensor = self.preprocess(face_image)
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
pitch, yaw = self.postprocess((outputs[0], outputs[1]))
return pitch, yaw
return self.postprocess((outputs[0], outputs[1]))

View File

@@ -25,4 +25,4 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = ['create_landmarker', 'Landmark106', 'BaseLandmarker']
__all__ = ['BaseLandmarker', 'Landmark106', 'create_landmarker']

View File

@@ -30,3 +30,15 @@ class BaseLandmarker(ABC):
where N is the number of landmarks.
"""
raise NotImplementedError
def __call__(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""Callable shortcut for the `get_landmarks` method.
Args:
image (np.ndarray): The full source image in BGR format.
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
Returns:
np.ndarray: An array of predicted landmark points with shape (N, 2).
"""
return self.get_landmarks(image, bbox)

View File

@@ -2,7 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Tuple
import cv2
import numpy as np
@@ -46,7 +45,7 @@ class Landmark106(BaseLandmarker):
def __init__(
self,
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192),
input_size: tuple[int, int] = (192, 192),
) -> None:
Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
self.input_size = input_size
@@ -85,7 +84,7 @@ class Landmark106(BaseLandmarker):
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""Prepares a face crop for inference.
This method takes a face bounding box, performs a center alignment to
@@ -155,58 +154,3 @@ class Landmark106(BaseLandmarker):
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
landmarks = self.postprocess(raw_predictions, transform_matrix)
return landmarks
# Testing code
if __name__ == '__main__':
from uniface.detection import RetinaFace
from uniface.landmark import Landmark106
face_detector = RetinaFace()
landmarker = Landmark106()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Webcam not available.')
exit()
print("Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print('Frame capture failed.')
break
# 2. The detect method returns a list of dictionaries
faces = face_detector.detect(frame)
if not faces:
cv2.imshow('Facial Landmark Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
continue
# 3. Loop through the list of face dictionaries
for face in faces:
# Extract the bounding box
bbox = face['bbox']
# 4. Get landmarks for the current face using its bounding box
landmarks = landmarker.get_landmarks(frame, bbox)
# --- Drawing Logic ---
# Draw the landmarks
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
# Draw the bounding box
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow('Facial Landmark Detection', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

View File

@@ -1,21 +1,41 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Logging utilities for UniFace.
This module provides a centralized logger for the UniFace library,
allowing users to enable verbose logging when debugging or developing.
"""
from __future__ import annotations
import logging
__all__ = ['Logger', 'enable_logging']
# Create logger for uniface
Logger = logging.getLogger('uniface')
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
Logger.addHandler(logging.NullHandler())
def enable_logging(level=logging.INFO):
"""
Enable verbose logging for uniface.
def enable_logging(level: int = logging.INFO) -> None:
"""Enable verbose logging for uniface.
Configures the logger to output messages to stdout with timestamps.
Call this function to see informational messages during model loading
and inference.
Args:
level: Logging level (logging.DEBUG, logging.INFO, etc.)
level: Logging level. Defaults to logging.INFO.
Common values: logging.DEBUG, logging.INFO, logging.WARNING.
Example:
>>> from uniface import enable_logging
>>> import logging
>>> enable_logging() # Show INFO logs
>>> enable_logging(level=logging.DEBUG) # Show DEBUG logs
"""
Logger.handlers.clear()
handler = logging.StreamHandler()

View File

@@ -2,6 +2,15 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Model weight management for UniFace.
This module handles downloading, caching, and verifying model weights
using SHA-256 checksums for integrity validation.
"""
from __future__ import annotations
from enum import Enum
import hashlib
import os
@@ -14,33 +23,32 @@ from uniface.log import Logger
__all__ = ['verify_model_weights']
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
"""
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> str:
"""Ensure model weights are present, downloading and verifying them if necessary.
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`), this function checks if
the corresponding `.onnx` weight file exists locally. If not, it downloads the file from a predefined URL.
After download, the files integrity is verified using a SHA-256 hash. If verification fails, the file is deleted
and an error is raised.
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`),
this function checks if the corresponding weight file exists locally. If not,
it downloads the file from a predefined URL and verifies its integrity using
a SHA-256 hash.
Args:
model_name (Enum): Model weight identifier (e.g., `RetinaFaceWeights.MNET_V2`, `ArcFaceWeights.RESNET`, etc.).
root (str, optional): Directory to store or locate the model weights. Defaults to '~/.uniface/models'.
model_name: Model weight identifier enum (e.g., `RetinaFaceWeights.MNET_V2`).
root: Directory to store or locate the model weights.
Defaults to '~/.uniface/models'.
Returns:
str: Absolute path to the verified model weights file.
Absolute path to the verified model weights file.
Raises:
ValueError: If the model is unknown or SHA-256 verification fails.
ConnectionError: If downloading the file fails.
Examples:
>>> from uniface.models import RetinaFaceWeights, verify_model_weights
>>> verify_model_weights(RetinaFaceWeights.MNET_V2)
Example:
>>> from uniface.constants import RetinaFaceWeights
>>> from uniface.model_store import verify_model_weights
>>> path = verify_model_weights(RetinaFaceWeights.MNET_V2)
>>> print(path)
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
>>> verify_model_weights(RetinaFaceWeights.RESNET34, root='/custom/dir')
'/custom/dir/retinaface_r34.onnx'
"""
root = os.path.expanduser(root)
@@ -73,10 +81,16 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
return model_path
def download_file(url: str, dest_path: str) -> None:
"""Download a file from a URL in chunks and save it to the destination path."""
def download_file(url: str, dest_path: str, timeout: int = 30) -> None:
"""Download a file from a URL in chunks and save it to the destination path.
Args:
url: URL to download from.
dest_path: Local file path to save to.
timeout: Connection timeout in seconds. Defaults to 30.
"""
try:
response = requests.get(url, stream=True)
response = requests.get(url, stream=True, timeout=timeout)
response.raise_for_status()
with (
open(dest_path, 'wb') as file,

View File

@@ -2,16 +2,23 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List
"""ONNX Runtime utilities for UniFace.
This module provides helper functions for creating and managing ONNX Runtime
inference sessions with automatic hardware acceleration detection.
"""
from __future__ import annotations
import onnxruntime as ort
from uniface.log import Logger
__all__ = ['create_onnx_session', 'get_available_providers']
def get_available_providers() -> List[str]:
"""
Get list of available ONNX Runtime execution providers for the current platform.
def get_available_providers() -> list[str]:
"""Get list of available ONNX Runtime execution providers.
Automatically detects and prioritizes hardware acceleration:
- CoreML on Apple Silicon (M1/M2/M3/M4)
@@ -19,13 +26,12 @@ def get_available_providers() -> List[str]:
- CPU as fallback (always available)
Returns:
List[str]: Ordered list of execution providers to use
Ordered list of execution providers to use.
Examples:
Example:
>>> providers = get_available_providers()
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
>>> # On CPU-only: ['CPUExecutionProvider']
"""
available = ort.get_available_providers()
providers = []
@@ -48,26 +54,28 @@ def get_available_providers() -> List[str]:
return providers
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
"""
Create an ONNX Runtime inference session with optimal provider selection.
def create_onnx_session(
model_path: str,
providers: list[str] | None = None,
) -> ort.InferenceSession:
"""Create an ONNX Runtime inference session with optimal provider selection.
Args:
model_path (str): Path to the ONNX model file
providers (List[str], optional): List of providers to use.
If None, automatically detects best available providers.
model_path: Path to the ONNX model file.
providers: List of execution providers to use. If None, automatically
detects best available providers.
Returns:
ort.InferenceSession: Configured ONNX Runtime session
Configured ONNX Runtime session.
Raises:
RuntimeError: If session creation fails
RuntimeError: If session creation fails.
Examples:
>>> session = create_onnx_session("model.onnx")
Example:
>>> session = create_onnx_session('model.onnx')
>>> # Automatically uses best available providers
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
>>> session = create_onnx_session('model.onnx', providers=['CPUExecutionProvider'])
>>> # Force CPU-only execution
"""
if providers is None:
@@ -90,8 +98,7 @@ def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.Inf
'CPUExecutionProvider': 'CPU',
}
provider_display = provider_names.get(active_provider, active_provider)
Logger.debug(f'Model loaded with provider: {active_provider}')
print(f'✓ Model loaded ({provider_display})')
Logger.info(f'Model loaded ({provider_display})')
return session
except Exception as e:

View File

@@ -2,7 +2,7 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Union
from __future__ import annotations
from uniface.constants import ParsingWeights
@@ -13,38 +13,29 @@ __all__ = ['BaseFaceParser', 'BiSeNet', 'create_face_parser']
def create_face_parser(
model_name: Union[str, ParsingWeights] = ParsingWeights.RESNET18,
model_name: str | ParsingWeights = ParsingWeights.RESNET18,
) -> BaseFaceParser:
"""
Factory function to create a face parsing model instance.
"""Factory function to create a face parsing model instance.
This function provides a convenient way to instantiate face parsing models
without directly importing the specific model classes. It supports both
string-based and enum-based model selection.
without directly importing the specific model classes.
Args:
model_name (Union[str, ParsingWeights]): The face parsing model to create.
Can be either a string or a ParsingWeights enum value.
Available options:
model_name: The face parsing model to create. Can be either a string
or a ParsingWeights enum value. Available options:
- 'parsing_resnet18' or ParsingWeights.RESNET18 (default)
- 'parsing_resnet34' or ParsingWeights.RESNET34
Returns:
BaseFaceParser: An instance of the requested face parsing model.
An instance of the requested face parsing model.
Raises:
ValueError: If the model_name is not recognized.
Examples:
>>> # Using enum
Example:
>>> from uniface.parsing import create_face_parser
>>> from uniface.constants import ParsingWeights
>>> parser = create_face_parser(ParsingWeights.RESNET18)
>>>
>>> # Using string
>>> parser = create_face_parser('parsing_resnet18')
>>>
>>> # Parse a face image
>>> mask = parser.parse(face_crop)
"""
# Convert string to enum if necessary

View File

@@ -3,7 +3,6 @@
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
from typing import Tuple
import numpy as np
@@ -53,7 +52,7 @@ class BaseFaceParser(ABC):
raise NotImplementedError('Subclasses must implement the preprocess method.')
@abstractmethod
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
"""
Postprocess raw model outputs into a segmentation mask.
@@ -89,7 +88,7 @@ class BaseFaceParser(ABC):
Example:
>>> parser = create_face_parser()
>>> mask = parser.parse(face_crop)
>>> print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
>>> print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
"""
raise NotImplementedError('Subclasses must implement the parse method.')

View File

@@ -2,7 +2,6 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Tuple
import cv2
import numpy as np
@@ -54,17 +53,17 @@ class BiSeNet(BaseFaceParser):
>>> # Detect faces and parse each face
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox']
... bbox = face.bbox
... x1, y1, x2, y2 = map(int, bbox[:4])
... face_crop = image[y1:y2, x1:x2]
... mask = parser.parse(face_crop)
... print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
... print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
"""
def __init__(
self,
model_name: ParsingWeights = ParsingWeights.RESNET18,
input_size: Tuple[int, int] = (512, 512),
input_size: tuple[int, int] = (512, 512),
) -> None:
Logger.info(f'Initializing BiSeNet with model={model_name}, input_size={input_size}')
@@ -127,7 +126,7 @@ class BiSeNet(BaseFaceParser):
return image
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
"""
Postprocess model output to segmentation mask.

View File

@@ -0,0 +1,52 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
import numpy as np
from .blur import BlurFace
def anonymize_faces(
image: np.ndarray,
detector: object | None = None,
method: str = 'pixelate',
blur_strength: float = 3.0,
pixel_blocks: int = 10,
confidence_threshold: float = 0.5,
**kwargs,
) -> np.ndarray:
"""One-line face anonymization with automatic detection.
Args:
image (np.ndarray): Input image (BGR format).
detector: Face detector instance. Creates RetinaFace if None.
method (str): Blur method name. Defaults to 'pixelate'.
blur_strength (float): Blur intensity. Defaults to 3.0.
pixel_blocks (int): Block count for pixelate. Defaults to 10.
confidence_threshold (float): Detection confidence threshold. Defaults to 0.5.
**kwargs: Additional detector arguments.
Returns:
np.ndarray: Anonymized image.
Example:
>>> from uniface.privacy import anonymize_faces
>>> anonymized = anonymize_faces(image, method='pixelate')
"""
if detector is None:
try:
from uniface import RetinaFace
detector = RetinaFace(confidence_threshold=confidence_threshold, **kwargs)
except ImportError as err:
raise ImportError('Could not import RetinaFace. Please ensure UniFace is properly installed.') from err
faces = detector.detect(image)
blurrer = BlurFace(method=method, blur_strength=blur_strength, pixel_blocks=pixel_blocks)
return blurrer.anonymize(image, faces)
__all__ = ['BlurFace', 'anonymize_faces']

200
uniface/privacy/blur.py Normal file
View File

@@ -0,0 +1,200 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from typing import TYPE_CHECKING, ClassVar
import cv2
import numpy as np
if TYPE_CHECKING:
pass
__all__ = ['BlurFace', 'EllipticalBlur']
def _gaussian_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
"""Apply Gaussian blur to a region."""
h, w = region.shape[:2]
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
return cv2.GaussianBlur(region, (kernel_size, kernel_size), 0)
def _median_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
"""Apply median blur to a region."""
h, w = region.shape[:2]
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
return cv2.medianBlur(region, kernel_size)
def _pixelate_blur(region: np.ndarray, blocks: int = 10) -> np.ndarray:
"""Apply pixelation to a region."""
h, w = region.shape[:2]
temp_h, temp_w = max(1, h // blocks), max(1, w // blocks)
temp = cv2.resize(region, (temp_w, temp_h), interpolation=cv2.INTER_LINEAR)
return cv2.resize(temp, (w, h), interpolation=cv2.INTER_NEAREST)
def _blackout_blur(region: np.ndarray, color: tuple[int, int, int] = (0, 0, 0)) -> np.ndarray:
"""Replace region with solid color."""
return np.full_like(region, color)
class EllipticalBlur:
"""Elliptical blur with soft, feathered edges.
This blur applies Gaussian blur within an elliptical mask that follows
the natural oval shape of faces, requiring full image context for proper blending.
Args:
blur_strength (float): Blur intensity multiplier. Defaults to 3.0.
margin (int): Extra pixels to extend ellipse beyond bbox. Defaults to 20.
"""
def __init__(self, blur_strength: float = 3.0, margin: int = 20):
self.blur_strength = blur_strength
self.margin = margin
def __call__(
self,
image: np.ndarray,
bboxes: list[tuple | list],
inplace: bool = False,
) -> np.ndarray:
if not inplace:
image = image.copy()
h, w = image.shape[:2]
for bbox in bboxes:
x1, y1, x2, y2 = map(int, bbox)
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
axes_x = (x2 - x1) // 2 + self.margin
axes_y = (y2 - y1) // 2 + self.margin
# Create soft elliptical mask
mask = np.zeros((h, w), dtype=np.float32)
cv2.ellipse(mask, (center_x, center_y), (axes_x, axes_y), 0, 0, 360, 255, -1)
mask = cv2.GaussianBlur(mask, (51, 51), 0) / 255.0
mask = mask[:, :, np.newaxis]
kernel_size = max(3, int((min(axes_y, axes_x) * 2 / 7) * self.blur_strength)) | 1
blurred = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
image = (blurred * mask + image * (1 - mask)).astype(np.uint8)
return image
class BlurFace:
"""Face blurring with multiple anonymization methods.
Args:
method (str): Blur method - 'gaussian', 'pixelate', 'blackout', 'elliptical', or 'median'.
Defaults to 'pixelate'.
blur_strength (float): Intensity for gaussian/elliptical/median. Defaults to 3.0.
pixel_blocks (int): Block count for pixelate. Defaults to 10.
color (Tuple[int, int, int]): Fill color (BGR) for blackout. Defaults to (0, 0, 0).
margin (int): Edge margin for elliptical. Defaults to 20.
Example:
>>> blurrer = BlurFace(method='pixelate')
>>> anonymized = blurrer.anonymize(image, faces)
"""
VALID_METHODS: ClassVar[set[str]] = {'gaussian', 'pixelate', 'blackout', 'elliptical', 'median'}
def __init__(
self,
method: str = 'pixelate',
blur_strength: float = 3.0,
pixel_blocks: int = 15,
color: tuple[int, int, int] = (0, 0, 0),
margin: int = 20,
):
self.method = method.lower()
self._blur_strength = blur_strength
self._pixel_blocks = pixel_blocks
self._color = color
self._margin = margin
if self.method not in self.VALID_METHODS:
raise ValueError(f"Invalid blur method: '{method}'. Choose from: {sorted(self.VALID_METHODS)}")
if self.method == 'elliptical':
self._elliptical = EllipticalBlur(blur_strength, margin)
def _blur_region(self, region: np.ndarray) -> np.ndarray:
"""Apply blur to a single region based on the configured method."""
if self.method == 'gaussian':
return _gaussian_blur(region, self._blur_strength)
elif self.method == 'median':
return _median_blur(region, self._blur_strength)
elif self.method == 'pixelate':
return _pixelate_blur(region, self._pixel_blocks)
elif self.method == 'blackout':
return _blackout_blur(region, self._color)
return region # Fallback (should not reach here)
def anonymize(
self,
image: np.ndarray,
faces: list,
inplace: bool = False,
) -> np.ndarray:
"""Anonymize faces in an image.
Args:
image (np.ndarray): Input image (BGR format).
faces (List[Dict]): Face detections with 'bbox' key containing [x1, y1, x2, y2].
inplace (bool): Modify image in-place if True. Defaults to False.
Returns:
np.ndarray: Image with anonymized faces.
"""
if not faces:
return image if inplace else image.copy()
bboxes = [face.bbox for face in faces]
return self.blur_regions(image, bboxes, inplace)
def blur_regions(
self,
image: np.ndarray,
bboxes: list[tuple | list],
inplace: bool = False,
) -> np.ndarray:
"""Blur specific rectangular regions in an image.
Args:
image (np.ndarray): Input image (BGR format).
bboxes (List): Bounding boxes as [x1, y1, x2, y2].
inplace (bool): Modify image in-place if True. Defaults to False.
Returns:
np.ndarray: Image with blurred regions.
"""
if not bboxes:
return image if inplace else image.copy()
if self.method == 'elliptical':
return self._elliptical(image, bboxes, inplace)
if not inplace:
image = image.copy()
h, w = image.shape[:2]
for bbox in bboxes:
x1, y1, x2, y2 = map(int, bbox)
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
if x2 > x1 and y2 > y1:
image[y1:y2, x1:x2] = self._blur_region(image[y1:y2, x1:x2])
return image
def __repr__(self) -> str:
return f"BlurFace(method='{self.method}')"

View File

@@ -34,10 +34,7 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
>>> # Create a specific MobileFace recognizer
>>> from uniface.constants import MobileFaceWeights
>>> recognizer = create_recognizer(
... 'mobileface',
... model_name=MobileFaceWeights.MNET_V2
... )
>>> recognizer = create_recognizer('mobileface', model_name=MobileFaceWeights.MNET_V2)
>>> # Create a SphereFace recognizer
>>> recognizer = create_recognizer('sphereface')
@@ -55,10 +52,4 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
'create_recognizer',
'ArcFace',
'MobileFace',
'SphereFace',
'BaseRecognizer',
]
__all__ = ['ArcFace', 'BaseRecognizer', 'MobileFace', 'SphereFace', 'create_recognizer']

View File

@@ -2,9 +2,10 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Tuple, Union
import cv2
import numpy as np
@@ -13,16 +14,22 @@ from uniface.face_utils import face_alignment
from uniface.log import Logger
from uniface.onnx_utils import create_onnx_session
__all__ = ['BaseRecognizer', 'PreprocessConfig']
@dataclass
class PreprocessConfig:
"""
Configuration for preprocessing images before feeding them into the model.
"""Configuration for preprocessing images before feeding them into the model.
Attributes:
input_mean: Mean value(s) for normalization.
input_std: Standard deviation value(s) for normalization.
input_size: Target image size as (height, width).
"""
input_mean: Union[float, List[float]] = 127.5
input_std: Union[float, List[float]] = 127.5
input_size: Tuple[int, int] = (112, 112)
input_mean: float | list[float] = 127.5
input_std: float | list[float] = 127.5
input_size: tuple[int, int] = (112, 112)
class BaseRecognizer(ABC):
@@ -94,7 +101,7 @@ class BaseRecognizer(ABC):
"""
resized_img = cv2.resize(face_img, self.input_size)
if isinstance(self.input_std, (list, tuple)):
if isinstance(self.input_std, list | tuple):
# Per-channel normalization
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
@@ -116,13 +123,14 @@ class BaseRecognizer(ABC):
return blob
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
"""
Extracts face embedding from an image.
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray | None = None) -> np.ndarray:
"""Extract face embedding from an image.
Args:
image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
image: Input face image in BGR format. If already aligned (112x112),
landmarks can be None.
landmarks: Facial landmarks (5 points for alignment). Optional if
image is already aligned.
Returns:
Face embedding vector (typically 512-dimensional).
@@ -141,16 +149,27 @@ class BaseRecognizer(ABC):
return embedding
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
"""
Extracts a l2 normalized face embedding vector from an image.
"""Extract an L2-normalized face embedding vector from an image.
Args:
image: Input face image (BGR format).
image: Input face image in BGR format.
landmarks: Facial landmarks (5 points for alignment).
Returns:
Normalized face embedding vector (typically 512-dimensional).
L2-normalized face embedding vector (typically 512-dimensional).
"""
embedding = self.get_embedding(image, landmarks)
norm = np.linalg.norm(embedding)
return embedding / norm if norm > 0 else embedding
def __call__(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
"""Callable shortcut for the `get_normalized_embedding` method.
Args:
image: Input face image in BGR format.
landmarks: Facial landmarks (5 points for alignment).
Returns:
L2-normalized face embedding vector (typically 512-dimensional).
"""
return self.get_normalized_embedding(image, landmarks)

View File

@@ -2,7 +2,7 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Optional
from __future__ import annotations
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
from uniface.model_store import verify_model_weights
@@ -34,7 +34,7 @@ class ArcFace(BaseRecognizer):
def __init__(
self,
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None,
preprocessing: PreprocessConfig | None = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
@@ -64,7 +64,7 @@ class MobileFace(BaseRecognizer):
def __init__(
self,
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None,
preprocessing: PreprocessConfig | None = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
@@ -94,7 +94,7 @@ class SphereFace(BaseRecognizer):
def __init__(
self,
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None,
preprocessing: PreprocessConfig | None = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))

View File

@@ -0,0 +1,47 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from uniface.constants import MiniFASNetWeights
from uniface.types import SpoofingResult
from .base import BaseSpoofer
from .minifasnet import MiniFASNet
__all__ = [
'BaseSpoofer',
'MiniFASNet',
'MiniFASNetWeights',
'SpoofingResult',
'create_spoofer',
]
def create_spoofer(
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
scale: float | None = None,
) -> MiniFASNet:
"""Factory function to create a face anti-spoofing model.
This is a convenience function that creates a MiniFASNet instance
with the specified model variant and optional custom scale.
Args:
model_name: The model variant to use. Options:
- MiniFASNetWeights.V2: Improved version (default), uses scale=2.7
- MiniFASNetWeights.V1SE: Squeeze-and-excitation version, uses scale=4.0
scale: Custom crop scale factor for face region. If None, uses the
default scale for the selected model variant.
Returns:
An initialized face anti-spoofing model.
Example:
>>> from uniface.spoofing import create_spoofer, MiniFASNetWeights
>>> spoofer = create_spoofer()
>>> result = spoofer.predict(image, face.bbox)
>>> print(f'Is real: {result.is_real}, Confidence: {result.confidence:.2%}')
"""
return MiniFASNet(model_name=model_name, scale=scale)

112
uniface/spoofing/base.py Normal file
View File

@@ -0,0 +1,112 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from __future__ import annotations
from abc import ABC, abstractmethod
import numpy as np
from uniface.types import SpoofingResult
__all__ = ['BaseSpoofer', 'SpoofingResult']
class BaseSpoofer(ABC):
"""
Abstract base class for all face anti-spoofing models.
This class defines the common interface that all anti-spoofing models must implement,
ensuring consistency across different spoofing detection methods. Anti-spoofing models
detect whether a face is real (live person) or fake (photo, video, mask, etc.).
"""
@abstractmethod
def _initialize_model(self) -> None:
"""
Initialize the underlying model for inference.
This method should handle loading model weights, creating the
inference session (e.g., ONNX Runtime), and any necessary
setup procedures to prepare the model for prediction.
Raises:
RuntimeError: If the model fails to load or initialize.
"""
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
@abstractmethod
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
"""
Preprocess the input image for model inference.
This method should crop the face region using the bounding box,
resize it to the model's expected input size, and normalize
the pixel values as required by the model.
Args:
image (np.ndarray): Input image in BGR format with shape (H, W, C).
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
Returns:
np.ndarray: The preprocessed image tensor ready for inference,
typically with shape (1, C, H, W).
"""
raise NotImplementedError('Subclasses must implement the preprocess method.')
@abstractmethod
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
"""
Postprocess raw model outputs into prediction result.
This method takes the raw output from the model's inference and
converts it into a SpoofingResult.
Args:
outputs (np.ndarray): Raw outputs from the model inference (logits).
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
"""
raise NotImplementedError('Subclasses must implement the postprocess method.')
@abstractmethod
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
"""
Perform end-to-end anti-spoofing prediction on a face.
This method orchestrates the full pipeline: preprocessing the input,
running inference, and postprocessing to return the prediction.
Args:
image (np.ndarray): Input image in BGR format containing the face.
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
This is typically obtained from a face detector.
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
Example:
>>> spoofer = MiniFASNet()
>>> detector = RetinaFace()
>>> faces = detector.detect(image)
>>> for face in faces:
... result = spoofer.predict(image, face.bbox)
... label = 'Real' if result.is_real else 'Fake'
... print(f'{label}: {result.confidence:.2%}')
"""
raise NotImplementedError('Subclasses must implement the predict method.')
def __call__(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
"""
Provides a convenient, callable shortcut for the `predict` method.
Args:
image (np.ndarray): Input image in BGR format.
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
"""
return self.predict(image, bbox)

View File

@@ -0,0 +1,220 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
from uniface.constants import MiniFASNetWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from uniface.types import SpoofingResult
from .base import BaseSpoofer
__all__ = ['MiniFASNet']
# Default crop scales for each model variant
DEFAULT_SCALES = {
MiniFASNetWeights.V1SE: 4.0,
MiniFASNetWeights.V2: 2.7,
}
class MiniFASNet(BaseSpoofer):
"""
MiniFASNet: Lightweight Face Anti-Spoofing with ONNX Runtime.
MiniFASNet is a face anti-spoofing model that detects whether a face is real
(live person) or fake (photo, video replay, mask, etc.). It supports two model
variants: V1SE (with squeeze-and-excitation) and V2 (improved version).
The model takes a face region cropped from the image using a bounding box
and predicts whether it's a real or spoofed face.
Reference:
https://github.com/yakhyo/face-anti-spoofing
Args:
model_name (MiniFASNetWeights): The enum specifying the model variant to load.
Options: V1SE (scale=4.0), V2 (scale=2.7).
Defaults to `MiniFASNetWeights.V2`.
scale (Optional[float]): Custom crop scale factor for face region.
If None, uses the default scale for the selected model variant.
V1SE uses 4.0, V2 uses 2.7.
Attributes:
scale (float): Crop scale factor for face region extraction.
input_size (Tuple[int, int]): Model input dimensions (width, height).
Example:
>>> from uniface.spoofing import MiniFASNet
>>> from uniface import RetinaFace
>>>
>>> detector = RetinaFace()
>>> spoofer = MiniFASNet()
>>>
>>> # Detect faces and check if they are real
>>> faces = detector.detect(image)
>>> for face in faces:
... result = spoofer.predict(image, face.bbox)
... label = 'Real' if result.is_real else 'Fake'
... print(f'{label}: {result.confidence:.2%}')
"""
def __init__(
self,
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
scale: float | None = None,
) -> None:
Logger.info(f'Initializing MiniFASNet with model={model_name.name}')
# Use default scale for the model variant if not specified
self.scale = scale if scale is not None else DEFAULT_SCALES.get(model_name, 2.7)
self.model_path = verify_model_weights(model_name)
self._initialize_model()
def _initialize_model(self) -> None:
"""
Initialize the ONNX model from the stored model path.
Raises:
RuntimeError: If the model fails to load or initialize.
"""
try:
self.session = create_onnx_session(self.model_path)
# Get input configuration
input_cfg = self.session.get_inputs()[0]
self.input_name = input_cfg.name
# Input shape is (batch, channels, height, width) - we need (width, height)
self.input_size = tuple(input_cfg.shape[2:4][::-1]) # (width, height)
# Get output configuration
output_cfg = self.session.get_outputs()[0]
self.output_name = output_cfg.name
Logger.info(f'MiniFASNet initialized with input size {self.input_size}, scale={self.scale}')
except Exception as e:
Logger.error(f"Failed to load MiniFASNet model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f'Failed to initialize MiniFASNet model: {e}') from e
def _xyxy_to_xywh(self, bbox: list | np.ndarray) -> list[int]:
"""Convert bounding box from [x1, y1, x2, y2] to [x, y, w, h] format."""
x1, y1, x2, y2 = bbox[:4]
return [int(x1), int(y1), int(x2 - x1), int(y2 - y1)]
def _crop_face(self, image: np.ndarray, bbox_xywh: list[int]) -> np.ndarray:
"""
Crop and resize face region from image using scale factor.
The crop is centered on the face bounding box and scaled to capture
more context around the face, which is important for anti-spoofing.
Args:
image: Input image in BGR format.
bbox_xywh: Face bounding box in [x, y, w, h] format.
Returns:
Cropped and resized face region.
"""
src_h, src_w = image.shape[:2]
x, y, box_w, box_h = bbox_xywh
# Calculate the scale to apply based on image and face size
scale = min((src_h - 1) / box_h, (src_w - 1) / box_w, self.scale)
new_w = box_w * scale
new_h = box_h * scale
# Calculate center of the bounding box
center_x = x + box_w / 2
center_y = y + box_h / 2
# Calculate new bounding box coordinates
x1 = max(0, int(center_x - new_w / 2))
y1 = max(0, int(center_y - new_h / 2))
x2 = min(src_w - 1, int(center_x + new_w / 2))
y2 = min(src_h - 1, int(center_y + new_h / 2))
# Crop and resize
cropped = image[y1 : y2 + 1, x1 : x2 + 1]
resized = cv2.resize(cropped, self.input_size)
return resized
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
"""
Preprocess the input image for model inference.
Crops the face region, converts to float32, and arranges
dimensions for the model (NCHW format).
Args:
image: Input image in BGR format with shape (H, W, C).
bbox: Face bounding box in [x1, y1, x2, y2] format.
Returns:
Preprocessed image tensor with shape (1, C, H, W).
"""
# Convert bbox format
bbox_xywh = self._xyxy_to_xywh(bbox)
# Crop and resize face region
face = self._crop_face(image, bbox_xywh)
# Convert to float32 (no normalization needed for this model)
face = face.astype(np.float32)
# HWC -> CHW -> NCHW
face = np.transpose(face, (2, 0, 1))
face = np.expand_dims(face, axis=0)
return face
def _softmax(self, x: np.ndarray) -> np.ndarray:
"""Apply softmax to logits along axis 1."""
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
return e_x / e_x.sum(axis=1, keepdims=True)
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
"""
Postprocess raw model outputs into prediction result.
Applies softmax to convert logits to probabilities and
returns the SpoofingResult with is_real flag and confidence score.
Args:
outputs: Raw outputs from the model inference (logits).
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
"""
probs = self._softmax(outputs)
label_idx = int(np.argmax(probs))
confidence = float(probs[0, label_idx])
return SpoofingResult(is_real=(label_idx == 1), confidence=confidence)
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
"""
Perform end-to-end anti-spoofing prediction on a face.
Args:
image: Input image in BGR format containing the face.
bbox: Face bounding box in [x1, y1, x2, y2] format.
Returns:
SpoofingResult: Result containing is_real flag and confidence score.
"""
# Preprocess
input_tensor = self.preprocess(image, bbox)
# Run inference
outputs = self.session.run([self.output_name], {self.input_name: input_tensor})[0]
# Postprocess and return
return self.postprocess(outputs)

216
uniface/types.py Normal file
View File

@@ -0,0 +1,216 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
"""Unified type definitions for UniFace.
This module centralizes all result dataclasses used across the library,
providing consistent and immutable return types for model predictions.
Note on mutability:
- Result dataclasses (GazeResult, SpoofingResult, EmotionResult, AttributeResult)
are frozen (immutable) since they represent computation outputs that shouldn't change.
- Face dataclass is mutable because FaceAnalyzer enriches it with additional
attributes (embedding, age, gender, etc.) after initial detection.
"""
from __future__ import annotations
from dataclasses import dataclass, fields
import numpy as np
from uniface.face_utils import compute_similarity
__all__ = [
'AttributeResult',
'EmotionResult',
'Face',
'GazeResult',
'SpoofingResult',
]
@dataclass(slots=True, frozen=True)
class GazeResult:
"""Result of gaze estimation.
Attributes:
pitch: Vertical gaze angle in radians (positive = up, negative = down).
yaw: Horizontal gaze angle in radians (positive = right, negative = left).
"""
pitch: float
yaw: float
def __repr__(self) -> str:
return f'GazeResult(pitch={self.pitch:.4f}, yaw={self.yaw:.4f})'
@dataclass(slots=True, frozen=True)
class SpoofingResult:
"""Result of face anti-spoofing detection.
Attributes:
is_real: True if the face is real/live, False if fake/spoof.
confidence: Confidence score for the prediction (0.0 to 1.0).
"""
is_real: bool
confidence: float
def __repr__(self) -> str:
label = 'Real' if self.is_real else 'Fake'
return f'SpoofingResult({label}, confidence={self.confidence:.4f})'
@dataclass(slots=True, frozen=True)
class EmotionResult:
"""Result of emotion recognition.
Attributes:
emotion: Predicted emotion label (e.g., 'Happy', 'Sad', 'Angry').
confidence: Confidence score for the prediction (0.0 to 1.0).
"""
emotion: str
confidence: float
def __repr__(self) -> str:
return f"EmotionResult('{self.emotion}', confidence={self.confidence:.4f})"
@dataclass(slots=True, frozen=True)
class AttributeResult:
"""Unified result structure for face attribute prediction.
This dataclass provides a consistent return type across different attribute
prediction models (e.g., AgeGender, FairFace), enabling interoperability
and unified handling of results.
Attributes:
gender: Predicted gender (0=Female, 1=Male).
age: Exact age in years. Provided by AgeGender model, None for FairFace.
age_group: Age range string like "20-29". Provided by FairFace, None for AgeGender.
race: Race/ethnicity label. Provided by FairFace only.
Properties:
sex: Gender as a human-readable string ("Female" or "Male").
Examples:
>>> # AgeGender result
>>> result = AttributeResult(gender=1, age=25)
>>> result.sex
'Male'
>>> # FairFace result
>>> result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
>>> result.sex
'Female'
"""
gender: int
age: int | None = None
age_group: str | None = None
race: str | None = None
@property
def sex(self) -> str:
"""Get gender as a string label (Female or Male)."""
return 'Female' if self.gender == 0 else 'Male'
def __repr__(self) -> str:
parts = [f'gender={self.sex}']
if self.age is not None:
parts.append(f'age={self.age}')
if self.age_group is not None:
parts.append(f'age_group={self.age_group}')
if self.race is not None:
parts.append(f'race={self.race}')
return f'AttributeResult({", ".join(parts)})'
@dataclass(slots=True)
class Face:
"""Detected face with analysis results.
This dataclass represents a single detected face along with optional
analysis results such as embeddings, age, gender, and race predictions.
Note: This dataclass is mutable (not frozen) because FaceAnalyzer enriches
Face objects with additional attributes after initial detection.
Attributes:
bbox: Bounding box coordinates [x1, y1, x2, y2].
confidence: Detection confidence score.
landmarks: Facial landmark coordinates (typically 5 points).
embedding: Face embedding vector for recognition (optional).
gender: Predicted gender, 0=Female, 1=Male (optional).
age: Predicted exact age in years (optional, from AgeGender model).
age_group: Predicted age range like "20-29" (optional, from FairFace).
race: Predicted race/ethnicity (optional, from FairFace).
emotion: Predicted emotion label (optional, from Emotion model).
emotion_confidence: Confidence score for emotion prediction (optional).
Properties:
sex: Gender as a human-readable string ("Female" or "Male").
bbox_xyxy: Bounding box in (x1, y1, x2, y2) format.
bbox_xywh: Bounding box in (x1, y1, width, height) format.
"""
# Required attributes (from detection)
bbox: np.ndarray
confidence: float
landmarks: np.ndarray
# Optional attributes (enriched by analyzers)
embedding: np.ndarray | None = None
gender: int | None = None
age: int | None = None
age_group: str | None = None
race: str | None = None
emotion: str | None = None
emotion_confidence: float | None = None
def compute_similarity(self, other: Face) -> float:
"""Compute cosine similarity with another face."""
if self.embedding is None or other.embedding is None:
raise ValueError('Both faces must have embeddings for similarity computation')
return float(compute_similarity(self.embedding, other.embedding))
def to_dict(self) -> dict:
"""Convert to dictionary."""
return {f.name: getattr(self, f.name) for f in fields(self)}
@property
def sex(self) -> str | None:
"""Get gender as a string label (Female or Male)."""
if self.gender is None:
return None
return 'Female' if self.gender == 0 else 'Male'
@property
def bbox_xyxy(self) -> np.ndarray:
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
return self.bbox.copy()
@property
def bbox_xywh(self) -> np.ndarray:
"""Get bounding box coordinates in (x1, y1, w, h) format."""
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
def __repr__(self) -> str:
parts = [f'Face(confidence={self.confidence:.3f}']
if self.age is not None:
parts.append(f'age={self.age}')
if self.age_group is not None:
parts.append(f'age_group={self.age_group}')
if self.gender is not None:
parts.append(f'sex={self.sex}')
if self.race is not None:
parts.append(f'race={self.race}')
if self.emotion is not None:
parts.append(f'emotion={self.emotion}')
if self.embedding is not None:
parts.append(f'embedding_dim={self.embedding.shape[0]}')
return ', '.join(parts) + ')'

View File

@@ -2,11 +2,26 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Tuple, Union
"""Visualization utilities for UniFace.
This module provides functions for drawing detection results, gaze directions,
and face parsing segmentation maps on images.
"""
from __future__ import annotations
import cv2
import numpy as np
__all__ = [
'FACE_PARSING_COLORS',
'FACE_PARSING_LABELS',
'draw_detections',
'draw_fancy_bbox',
'draw_gaze',
'vis_parsing_maps',
]
# Face parsing component names (19 classes)
FACE_PARSING_LABELS = [
'background',
@@ -57,23 +72,25 @@ FACE_PARSING_COLORS = [
def draw_detections(
*,
image: np.ndarray,
bboxes: Union[List[np.ndarray], List[List[float]]],
scores: Union[np.ndarray, List[float]],
landmarks: Union[List[np.ndarray], List[List[List[float]]]],
bboxes: list[np.ndarray] | list[list[float]],
scores: np.ndarray | list[float],
landmarks: list[np.ndarray] | list[list[list[float]]],
vis_threshold: float = 0.6,
draw_score: bool = False,
fancy_bbox: bool = True,
):
"""
Draws bounding boxes, landmarks, and optional scores on an image.
) -> None:
"""Draw bounding boxes, landmarks, and optional scores on an image.
Modifies the image in-place.
Args:
image: Input image to draw on.
bboxes: List of bounding boxes [x1, y1, x2, y2].
image: Input image to draw on (modified in-place).
bboxes: List of bounding boxes as [x1, y1, x2, y2].
scores: List of confidence scores.
landmarks: List of landmark sets with shape (5, 2).
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
draw_score: Whether to draw confidence scores. Defaults to False.
fancy_bbox: Use corner-style bounding boxes. Defaults to True.
"""
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
@@ -134,19 +151,18 @@ def draw_detections(
def draw_fancy_bbox(
image: np.ndarray,
bbox: np.ndarray,
color: Tuple[int, int, int] = (0, 255, 0),
color: tuple[int, int, int] = (0, 255, 0),
thickness: int = 3,
proportion: float = 0.2,
):
"""
Draws a bounding box with fancy corners on an image.
) -> None:
"""Draw a bounding box with fancy corners on an image.
Args:
image: Input image to draw on.
image: Input image to draw on (modified in-place).
bbox: Bounding box coordinates [x1, y1, x2, y2].
color: Color of the bounding box. Defaults to green.
thickness: Thickness of the bounding box lines. Defaults to 3.
proportion: Proportion of the corner length to the width/height of the bounding box. Defaults to 0.2.
color: Color of the bounding box in BGR. Defaults to green.
thickness: Thickness of the corner lines. Defaults to 3.
proportion: Proportion of corner length to box dimensions. Defaults to 0.2.
"""
x1, y1, x2, y2 = map(int, bbox)
width = x2 - x1
@@ -177,15 +193,14 @@ def draw_fancy_bbox(
def draw_gaze(
image: np.ndarray,
bbox: np.ndarray,
pitch: np.ndarray,
yaw: np.ndarray,
pitch: np.ndarray | float,
yaw: np.ndarray | float,
*,
draw_bbox: bool = True,
fancy_bbox: bool = True,
draw_angles: bool = True,
):
"""
Draws gaze direction with optional bounding box on an image.
) -> None:
"""Draw gaze direction with optional bounding box on an image.
Args:
image: Input image to draw on (modified in-place).
@@ -194,7 +209,7 @@ def draw_gaze(
yaw: Horizontal gaze angle in radians.
draw_bbox: Whether to draw the bounding box. Defaults to True.
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
draw_angles: Whether to display pitch/yaw values as text. Defaults to False.
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
"""
x_min, y_min, x_max, y_max = map(int, bbox[:4])
@@ -275,29 +290,25 @@ def vis_parsing_maps(
save_image: bool = False,
save_path: str = 'result.png',
) -> np.ndarray:
"""
Visualizes face parsing segmentation mask by overlaying colored regions on the image.
"""Visualize face parsing segmentation mask by overlaying colored regions.
Args:
image: Input face image in RGB format with shape (H, W, 3).
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
value represents a facial component class (0-18).
value represents a facial component class (0-18).
save_image: Whether to save the visualization to disk. Defaults to False.
save_path: Path to save the visualization if save_image is True.
Returns:
np.ndarray: Blended image with segmentation overlay in BGR format.
Blended image with segmentation overlay in BGR format.
Example:
>>> import cv2
>>> from uniface.parsing import BiSeNet
>>> from uniface.visualization import vis_parsing_maps
>>>
>>> parser = BiSeNet()
>>> face_image = cv2.imread('face.jpg')
>>> mask = parser.parse(face_image)
>>>
>>> # Visualize
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
>>> result = vis_parsing_maps(face_rgb, mask)
>>> cv2.imwrite('parsed_face.jpg', result)