mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc | ||
|
|
bb1d209f3b | ||
|
|
54b769c0f1 | ||
|
|
4d1921e531 | ||
|
|
da8a5cf35b | ||
|
|
3982d677a9 | ||
|
|
f4458f0550 | ||
|
|
637316f077 | ||
|
|
6b1d2a1ce6 | ||
|
|
a5e97ac484 | ||
|
|
0c93598007 | ||
|
|
779952e3f8 | ||
|
|
39b50b62bd | ||
|
|
db7532ecf1 | ||
|
|
4b8dc2c0f9 | ||
|
|
0a2a10e165 | ||
|
|
84cda5f56c | ||
|
|
0771a7959a | ||
|
|
15947eb605 | ||
|
|
1ccc4f6b77 | ||
|
|
189755a1a6 |
BIN
.github/logos/gaze_crop.png
vendored
Normal file
BIN
.github/logos/gaze_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
Normal file
BIN
.github/logos/gaze_org.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 673 KiB |
38
.github/workflows/ci.yml
vendored
38
.github/workflows/ci.yml
vendored
@@ -10,14 +10,31 @@ on:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +44,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +55,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +73,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +95,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
17
.github/workflows/publish.yml
vendored
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
40
.pre-commit-config.yaml
Normal file
40
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
190
CONTRIBUTING.md
Normal file
190
CONTRIBUTING.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Contributing to UniFace
|
||||
|
||||
Thank you for considering contributing to UniFace! We welcome contributions of all kinds.
|
||||
|
||||
## How to Contribute
|
||||
|
||||
### Reporting Issues
|
||||
|
||||
- Use GitHub Issues to report bugs or suggest features
|
||||
- Include clear descriptions and reproducible examples
|
||||
- Check existing issues before creating new ones
|
||||
|
||||
### Pull Requests
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yakhyo/uniface.git
|
||||
cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.11+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
402
MODELS.md
402
MODELS.md
@@ -10,17 +10,17 @@ Complete guide to all available models, their performance characteristics, and s
|
||||
|
||||
RetinaFace models are trained on the WIDER FACE dataset and provide excellent accuracy-speed tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|---------------------|--------|--------|--------|--------|--------|----------------------------|
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ------ | ----- | ------ | ------ | ------ | ----------------------------- |
|
||||
| `MNET_025` | 0.4M | 1.7MB | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| `MNET_050` | 1.0M | 2.6MB | 89.42% | 87.97% | 82.40% | Mobile/Edge devices |
|
||||
| `MNET_V1` | 3.5M | 3.8MB | 90.59% | 89.14% | 84.13% | Balanced mobile |
|
||||
| `MNET_V2` ⭐ | 3.2M | 3.5MB | 91.70% | 91.03% | 86.60% | **Recommended default** |
|
||||
| `RESNET18` | 11.7M | 27MB | 92.50% | 91.02% | 86.63% | Server/High accuracy |
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
@@ -34,7 +34,7 @@ detector = RetinaFace() # Uses MNET_V2
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
@@ -46,13 +46,13 @@ detector = RetinaFace(
|
||||
|
||||
SCRFD (Sample and Computation Redistribution for Efficient Face Detection) models offer state-of-the-art speed-accuracy tradeoffs.
|
||||
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
|-----------------|--------|-------|--------|--------|--------|----------------------------|
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
| Model Name | Params | Size | Easy | Medium | Hard | Use Case |
|
||||
| ---------------- | ------ | ----- | ------ | ------ | ------ | ------------------------------- |
|
||||
| `SCRFD_500M` | 0.6M | 2.5MB | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
@@ -63,29 +63,79 @@ from uniface.constants import SCRFDWeights
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### YOLOv5-Face Family
|
||||
|
||||
YOLOv5-Face models provide excellent detection accuracy with 5-point facial landmarks, optimized for real-time applications.
|
||||
|
||||
| Model Name | Size | Easy | Medium | Hard | Use Case |
|
||||
| -------------- | ---- | ------ | ------ | ------ | ------------------------------ |
|
||||
| `YOLOV5N` | 11MB | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| `YOLOV5S` ⭐ | 28MB | 94.33% | 92.61% | 83.15% | **Real-time + accuracy** |
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import YOLOv5Face
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
# Lightweight/Mobile
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5N,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# Real-time detection (recommended)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5M,
|
||||
confidence_threshold=0.6
|
||||
)
|
||||
|
||||
# Detect faces with landmarks
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face.bbox # [x1, y1, x2, y2]
|
||||
confidence = face.confidence
|
||||
landmarks = face.landmarks # 5-point landmarks (5, 2)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Face Recognition Models
|
||||
|
||||
### ArcFace
|
||||
|
||||
State-of-the-art face recognition using additive angular margin loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
|-------------|-------------|--------|-------|----------------------------|
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
| Model Name | Backbone | Params | Size | Use Case |
|
||||
| ----------- | --------- | ------ | ----- | -------------------------------- |
|
||||
| `MNET` ⭐ | MobileNet | 2.0M | 8MB | **Balanced (recommended)** |
|
||||
| `RESNET` | ResNet50 | 43.6M | 166MB | Maximum accuracy |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Benchmark on your own dataset or use standard face verification benchmarks
|
||||
@@ -113,12 +163,12 @@ embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
Lightweight face recognition optimized for mobile devices.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-----------------|-----------------|--------|------|-------|-------|-------|----------|--------------------|
|
||||
| `MNET_025` | MobileNetV1 0.25| 0.36M | 1MB | 98.76%| 92.02%| 82.37%| 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55%| 94.87%| 86.89%| 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30%| 93.77%| 85.29%| 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53%| 94.56%| 86.79%| 95.13% | Balanced mobile |
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ----------------- | ---------------- | ------ | ---- | ------ | ------ | ------ | -------- | --------------------- |
|
||||
| `MNET_025` | MobileNetV1 0.25 | 0.36M | 1MB | 98.76% | 92.02% | 82.37% | 90.02% | Ultra-lightweight |
|
||||
| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55% | 94.87% | 86.89% | 95.16% | **Mobile/Edge** |
|
||||
| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30% | 93.77% | 85.29% | 92.79% | Mobile optimized |
|
||||
| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53% | 94.56% | 86.79% | 95.13% | Balanced mobile |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
@@ -140,10 +190,10 @@ recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
Face recognition using angular softmax loss.
|
||||
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
|-------------|----------|--------|------|-------|-------|-------|----------|----------------------|
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67%| 95.61%| 88.75%| 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72%| 95.64%| 89.92%| 96.83% | Research/Comparison |
|
||||
| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case |
|
||||
| ------------ | -------- | ------ | ---- | ------ | ------ | ------ | -------- | ------------------- |
|
||||
| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67% | 95.61% | 88.75% | 96.58% | Research/Comparison |
|
||||
| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72% | 95.64% | 89.92% | 96.83% | Research/Comparison |
|
||||
|
||||
**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities)
|
||||
**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks
|
||||
@@ -166,9 +216,9 @@ recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20)
|
||||
|
||||
High-precision facial landmark localization.
|
||||
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
|------------|--------|--------|------|-----------------------------|
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
| Model Name | Points | Params | Size | Use Case |
|
||||
| ---------- | ------ | ------ | ---- | ------------------------ |
|
||||
| `2D106` | 106 | 3.7M | 14MB | Face alignment, analysis |
|
||||
|
||||
**Note**: Provides 106 facial keypoints for detailed face analysis and alignment
|
||||
|
||||
@@ -183,6 +233,7 @@ landmarks = landmarker.get_landmarks(image, bbox)
|
||||
```
|
||||
|
||||
**Landmark Groups:**
|
||||
|
||||
- Face contour: 0-32 (33 points)
|
||||
- Eyebrows: 33-50 (18 points)
|
||||
- Nose: 51-62 (12 points)
|
||||
@@ -195,9 +246,9 @@ landmarks = landmarker.get_landmarks(image, bbox)
|
||||
|
||||
### Age & Gender Detection
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
|------------|-------------|--------|------|-------------------|
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | ----------- | ------ | ---- | --------------- |
|
||||
| `DEFAULT` | Age, Gender | 2.1M | 8MB | General purpose |
|
||||
|
||||
**Dataset**: Trained on CelebA
|
||||
**Note**: Accuracy varies by demographic and image quality. Test on your specific use case.
|
||||
@@ -208,18 +259,50 @@ landmarks = landmarker.get_landmarks(image, bbox)
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
result = predictor.predict(image, bbox)
|
||||
# Returns: AttributeResult with gender, age, sex property
|
||||
# result.gender: 0 for Female, 1 for Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | --------------------- | ------ | ----- | --------------------------- |
|
||||
| `DEFAULT` | Race, Gender, Age Group | - | 44MB | Balanced demographic prediction |
|
||||
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
**Note**: FairFace provides more equitable predictions across different racial and gender groups
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import FairFace
|
||||
|
||||
predictor = FairFace()
|
||||
result = predictor.predict(image, bbox)
|
||||
# Returns: AttributeResult with gender, age_group, race, sex property
|
||||
# result.gender: 0 for Female, 1 for Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Emotion Detection
|
||||
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
|--------------|---------|--------|------|-----------------------|
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
| Model Name | Classes | Params | Size | Use Case |
|
||||
| ------------- | ------- | ------ | ---- | --------------- |
|
||||
| `AFFECNET7` | 7 | 0.5M | 2MB | 7-class emotion |
|
||||
| `AFFECNET8` | 8 | 0.5M | 2MB | 8-class emotion |
|
||||
|
||||
**Classes (7)**: Neutral, Happy, Sad, Surprise, Fear, Disgust, Anger
|
||||
**Classes (8)**: Above + Contempt
|
||||
@@ -234,120 +317,164 @@ from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
result = predictor.predict(image, landmarks)
|
||||
# result.emotion: predicted emotion label
|
||||
# result.confidence: confidence score
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Model Selection Guide
|
||||
## Gaze Estimation Models
|
||||
|
||||
### By Use Case
|
||||
### MobileGaze Family
|
||||
|
||||
#### Mobile/Edge Devices
|
||||
- **Detection**: `RetinaFace(MNET_025)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `MobileFace(MNET_V2)`
|
||||
- **Priority**: Speed, small model size
|
||||
Real-time gaze direction prediction models trained on Gaze360 dataset. Returns pitch (vertical) and yaw (horizontal) angles in radians.
|
||||
|
||||
#### Real-Time Applications (Webcam, Video)
|
||||
- **Detection**: `RetinaFace(MNET_V2)` or `SCRFD(SCRFD_500M)`
|
||||
- **Recognition**: `ArcFace(MNET)`
|
||||
- **Priority**: Speed-accuracy balance
|
||||
| Model Name | Params | Size | MAE* | Use Case |
|
||||
| -------------- | ------ | ------- | ----- | ----------------------------- |
|
||||
| `RESNET18` | 11.7M | 43 MB | 12.84 | Balanced accuracy/speed |
|
||||
| `RESNET34` ⭐ | 24.8M | 81.6 MB | 11.33 | **Recommended default** |
|
||||
| `RESNET50` | 25.6M | 91.3 MB | 11.34 | High accuracy |
|
||||
| `MOBILENET_V2` | 3.5M | 9.59 MB | 13.07 | Mobile/Edge devices |
|
||||
| `MOBILEONE_S0` | 2.1M | 4.8 MB | 12.58 | Lightweight/Real-time |
|
||||
|
||||
#### High-Accuracy Applications (Security, Verification)
|
||||
- **Detection**: `SCRFD(SCRFD_10G)` or `RetinaFace(RESNET34)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Maximum accuracy
|
||||
*MAE (Mean Absolute Error) in degrees on Gaze360 test set - lower is better
|
||||
|
||||
#### Server/Cloud Deployment
|
||||
- **Detection**: `SCRFD(SCRFD_10G)`
|
||||
- **Recognition**: `ArcFace(RESNET)`
|
||||
- **Priority**: Accuracy, batch processing
|
||||
**Dataset**: Trained on Gaze360 (indoor/outdoor scenes with diverse head poses)
|
||||
**Training**: 200 epochs with classification-based approach (binned angles)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
import numpy as np
|
||||
|
||||
# Default (recommended)
|
||||
gaze_estimator = MobileGaze() # Uses RESNET34
|
||||
|
||||
# Lightweight model
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Estimate gaze from face crop
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°, Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
**Note**: Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
|
||||
---
|
||||
|
||||
### By Hardware
|
||||
## Face Parsing Models
|
||||
|
||||
#### Apple Silicon (M1/M2/M3/M4)
|
||||
**Recommended**: All models work well with ARM64 optimizations (automatically included)
|
||||
### BiSeNet Family
|
||||
|
||||
```bash
|
||||
pip install uniface
|
||||
BiSeNet (Bilateral Segmentation Network) models for semantic face parsing. Segments face images into 19 facial component classes.
|
||||
|
||||
| Model Name | Params | Size | Classes | Use Case |
|
||||
| -------------- | ------ | ------- | ------- | ----------------------------- |
|
||||
| `RESNET18` ⭐ | 13.3M | 50.7 MB | 19 | **Recommended default** |
|
||||
| `RESNET34` | 24.1M | 89.2 MB | 19 | Higher accuracy |
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
1. Background
|
||||
2. Skin
|
||||
3. Left Eyebrow
|
||||
4. Right Eyebrow
|
||||
5. Left Eye
|
||||
6. Right Eye
|
||||
7. Eye Glasses
|
||||
8. Left Ear
|
||||
9. Right Ear
|
||||
10. Ear Ring
|
||||
11. Nose
|
||||
12. Mouth
|
||||
13. Upper Lip
|
||||
14. Lower Lip
|
||||
15. Neck
|
||||
16. Neck Lace
|
||||
17. Cloth
|
||||
18. Hair
|
||||
19. Hat
|
||||
|
||||
**Dataset**: Trained on CelebAMask-HQ
|
||||
**Architecture**: BiSeNet with ResNet backbone
|
||||
**Input Size**: 512×512 (automatically resized)
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
import cv2
|
||||
|
||||
# Default (recommended)
|
||||
parser = BiSeNet() # Uses RESNET18
|
||||
|
||||
# Higher accuracy model
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
# Parse face image (already cropped)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# mask shape: (H, W) with values 0-18 representing classes
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Lightweight, real-time capable
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Good accuracy/speed tradeoff
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - High accuracy
|
||||
**Applications:**
|
||||
- Face makeup and beauty applications
|
||||
- Virtual try-on systems
|
||||
- Face editing and manipulation
|
||||
- Facial feature extraction
|
||||
- Portrait segmentation
|
||||
|
||||
**Benchmark on your M4**: `python scripts/run_detection.py --iterations 100`
|
||||
|
||||
#### NVIDIA GPU (CUDA)
|
||||
**Recommended**: Larger models for maximum throughput
|
||||
|
||||
```bash
|
||||
pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `SCRFD(SCRFD_500M)` - Maximum throughput
|
||||
- **Balanced**: `SCRFD(SCRFD_10G)` - Best overall
|
||||
- **Accurate**: `RetinaFace(RESNET34)` - Highest accuracy
|
||||
|
||||
#### CPU Only
|
||||
**Recommended**: Lightweight models
|
||||
|
||||
**Recommended models**:
|
||||
- **Fast**: `RetinaFace(MNET_025)` - Smallest, fastest
|
||||
- **Balanced**: `RetinaFace(MNET_V2)` - Recommended default
|
||||
- **Accurate**: `SCRFD(SCRFD_10G)` - Best accuracy on CPU
|
||||
|
||||
**Note**: FPS values vary significantly based on image size, number of faces, and hardware. Always benchmark on your specific setup.
|
||||
**Note**: Input should be a cropped face image. For full pipeline, use face detection first to obtain face crops.
|
||||
|
||||
---
|
||||
|
||||
## Benchmark Details
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### How to Benchmark
|
||||
### MiniFASNet Family
|
||||
|
||||
Run benchmarks on your own hardware:
|
||||
Lightweight face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
```bash
|
||||
# Detection speed
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
| Model Name | Size | Scale | Use Case |
|
||||
| ---------- | ------ | ----- | ----------------------------- |
|
||||
| `V1SE` | 1.2 MB | 4.0 | Squeeze-and-excitation variant |
|
||||
| `V2` ⭐ | 1.2 MB | 2.7 | **Recommended default** |
|
||||
|
||||
# Compare models
|
||||
python scripts/run_detection.py --image assets/test.jpg --method retinaface --iterations 100
|
||||
python scripts/run_detection.py --image assets/test.jpg --method scrfd --iterations 100
|
||||
**Dataset**: Trained on face anti-spoofing datasets
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
|
||||
# Detect and check liveness
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
### Accuracy Metrics Explained
|
||||
|
||||
- **WIDER FACE**: Standard face detection benchmark with three difficulty levels
|
||||
- **Easy**: Large faces (>50px), clear backgrounds
|
||||
- **Medium**: Medium-sized faces (30-50px), moderate occlusion
|
||||
- **Hard**: Small faces (<30px), heavy occlusion, blur
|
||||
|
||||
*Accuracy values are from the original papers - see references below*
|
||||
|
||||
- **Model Size**: ONNX model file size (affects download time and memory)
|
||||
- **Params**: Number of model parameters (affects inference speed)
|
||||
|
||||
### Important Notes
|
||||
|
||||
1. **Speed varies by**:
|
||||
- Image resolution
|
||||
- Number of faces in image
|
||||
- Hardware (CPU/GPU/CoreML)
|
||||
- Batch size
|
||||
- Operating system
|
||||
|
||||
2. **Accuracy varies by**:
|
||||
- Image quality
|
||||
- Lighting conditions
|
||||
- Face pose and occlusion
|
||||
- Demographic factors
|
||||
|
||||
3. **Always benchmark on your specific use case** before choosing a model
|
||||
**Note**: Requires face bounding box from a detector. Use with RetinaFace, SCRFD, or YOLOv5Face.
|
||||
|
||||
---
|
||||
|
||||
@@ -374,10 +501,10 @@ model_path = verify_model_weights(
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
python tools/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
python tools/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
@@ -387,13 +514,20 @@ python scripts/download_model.py --model MNET_V2
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face Original**: [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face) - Original PyTorch implementation
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **YOLOv5-Face**: [YOLO5Face: Why Reinventing a Face Detector](https://arxiv.org/abs/2105.12931)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
- **SphereFace**: [Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063)
|
||||
|
||||
- **BiSeNet**: [Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897)
|
||||
|
||||
379
QUICKSTART.md
379
QUICKSTART.md
@@ -39,12 +39,13 @@ faces = detector.detect(image)
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
@@ -69,12 +70,18 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
draw_detections(
|
||||
image=image,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite("output.jpg", image)
|
||||
@@ -106,8 +113,8 @@ faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
@@ -122,6 +129,7 @@ else:
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
@@ -151,10 +159,15 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks)
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
)
|
||||
|
||||
# Show frame
|
||||
cv2.imshow("UniFace - Press 'q' to quit", frame)
|
||||
@@ -186,11 +199,15 @@ faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
@@ -198,6 +215,45 @@ Face 2: Female, 28 years old
|
||||
|
||||
---
|
||||
|
||||
## 5b. FairFace Attributes (2 minutes)
|
||||
|
||||
Detect race, gender, and age group with balanced demographics:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
**Race Categories:** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups:** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
@@ -215,7 +271,7 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
@@ -227,7 +283,223 @@ if faces:
|
||||
|
||||
---
|
||||
|
||||
## 7. Batch Processing (3 minutes)
|
||||
## 7. Gaze Estimation (2 minutes)
|
||||
|
||||
Estimate where a person is looking:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Estimate gaze for each face
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: pitch=5.2°, yaw=-12.3°
|
||||
Face 2: pitch=-8.1°, yaw=15.7°
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 8. Face Parsing (2 minutes)
|
||||
|
||||
Segment face into semantic components (skin, eyes, nose, mouth, hair, etc.):
|
||||
|
||||
```python
|
||||
import cv2
|
||||
import numpy as np
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet() # Uses ResNet18 by default
|
||||
|
||||
# Load face image (already cropped)
|
||||
face_image = cv2.imread("face.jpg")
|
||||
|
||||
# Parse face into 19 components
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# Convert back to BGR for saving
|
||||
vis_bgr = cv2.cvtColor(vis_result, cv2.COLOR_RGB2BGR)
|
||||
cv2.imwrite("parsed_face.jpg", vis_bgr)
|
||||
|
||||
print(f"Detected {len(np.unique(mask))} facial components")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Detected 12 facial components
|
||||
```
|
||||
|
||||
**19 Facial Component Classes:**
|
||||
- Background, Skin, Eyebrows (L/R), Eyes (L/R), Eye Glasses
|
||||
- Ears (L/R), Ear Ring, Nose, Mouth, Lips (Upper/Lower)
|
||||
- Neck, Neck Lace, Cloth, Hair, Hat
|
||||
|
||||
---
|
||||
|
||||
## 9. Face Anonymization (2 minutes)
|
||||
|
||||
Automatically blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
print("Faces anonymized successfully!")
|
||||
```
|
||||
|
||||
**Manual control with custom parameters:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
# Initialize detector and blurrer
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
# Detect and anonymize
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("output.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Available blur methods:**
|
||||
|
||||
```python
|
||||
# Pixelation (news media standard)
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=8)
|
||||
|
||||
# Gaussian blur (smooth, natural)
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=4.0)
|
||||
|
||||
# Black boxes (maximum privacy)
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
|
||||
# Elliptical blur (natural face shape)
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=30)
|
||||
|
||||
# Median blur (edge-preserving)
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
**Webcam anonymization:**
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
**Command-line tool:**
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Face Anti-Spoofing (2 minutes)
|
||||
|
||||
Detect if a face is real or fake (photo, video replay, mask):
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet() # Uses V2 by default
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Real (98.5%)
|
||||
```
|
||||
|
||||
**Command-line tool:**
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 11. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
@@ -260,20 +532,20 @@ print("Done!")
|
||||
|
||||
---
|
||||
|
||||
## 8. Model Selection
|
||||
## 12. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
### Detection Models
|
||||
|
||||
```python
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
from uniface.detection import RetinaFace, SCRFD, YOLOv5Face
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
|
||||
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
confidence_threshold=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
@@ -281,10 +553,17 @@ detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2
|
||||
)
|
||||
|
||||
# Real-time with high accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
@@ -304,6 +583,35 @@ recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small siz
|
||||
recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method
|
||||
```
|
||||
|
||||
### Gaze Estimation Models
|
||||
|
||||
```python
|
||||
from uniface import MobileGaze
|
||||
from uniface.constants import GazeWeights
|
||||
|
||||
# Default (recommended)
|
||||
gaze_estimator = MobileGaze() # Uses RESNET34
|
||||
|
||||
# Lightweight (mobile/edge devices)
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# High accuracy
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.RESNET50)
|
||||
```
|
||||
|
||||
### Face Parsing Models
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
# Default (recommended, 50.7 MB)
|
||||
parser = BiSeNet() # Uses RESNET18
|
||||
|
||||
# Higher accuracy (89.2 MB)
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Issues
|
||||
@@ -354,7 +662,23 @@ from uniface import retinaface # Module, not class
|
||||
|
||||
## Next Steps
|
||||
|
||||
- **Detailed Examples**: Check the [examples/](examples/) folder for Jupyter notebooks
|
||||
### Jupyter Notebook Examples
|
||||
|
||||
Explore interactive examples for common tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
### Additional Resources
|
||||
|
||||
- **Model Benchmarks**: See [MODELS.md](MODELS.md) for performance comparisons
|
||||
- **Full Documentation**: Read [README.md](README.md) for complete API reference
|
||||
|
||||
@@ -363,10 +687,9 @@ from uniface import retinaface # Module, not class
|
||||
## References
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch)
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference)
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation)
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - Race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
|
||||
347
README.md
347
README.md
@@ -1,25 +1,34 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
</div>
|
||||
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, and attribute analysis with hardware acceleration support across platforms.
|
||||
**UniFace** is a lightweight, production-ready face analysis library built on ONNX Runtime. It provides high-performance face detection, recognition, landmark detection, face parsing, gaze estimation, and attribute analysis with hardware acceleration support across platforms.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace and SCRFD models
|
||||
- **High-Speed Face Detection**: ONNX-optimized RetinaFace, SCRFD, and YOLOv5-Face models
|
||||
- **Facial Landmark Detection**: Accurate 106-point landmark localization
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Face Parsing**: BiSeNet-based semantic segmentation with 19 facial component classes
|
||||
- **Gaze Estimation**: Real-time gaze direction prediction with MobileGaze
|
||||
- **Attribute Analysis**: Age, gender, race (FairFace), and emotion detection
|
||||
- **Anti-Spoofing**: Face liveness detection with MiniFASNet models
|
||||
- **Face Anonymization**: Privacy-preserving face blurring with 5 methods (pixelate, gaussian, blackout, elliptical, median)
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
@@ -56,6 +65,7 @@ pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
@@ -95,9 +105,9 @@ faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
|
||||
confidence = face.confidence
|
||||
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
@@ -115,8 +125,8 @@ recognizer = ArcFace()
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
@@ -132,7 +142,7 @@ detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
@@ -145,10 +155,145 @@ detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
print(f"{gender}, {age} years old")
|
||||
result = age_gender.predict(image, faces[0].bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
### FairFace Attributes (Race, Gender, Age Group)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
result = fairface.predict(image, faces[0].bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
### Gaze Estimation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
import numpy as np
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Visualize
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
```
|
||||
|
||||
### Face Parsing
|
||||
|
||||
```python
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
# Initialize parser
|
||||
parser = BiSeNet() # Uses ResNet18 by default
|
||||
|
||||
# Parse face image (already cropped)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
# Visualize with overlay
|
||||
import cv2
|
||||
face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
|
||||
# mask contains 19 classes: skin, eyes, nose, mouth, hair, etc.
|
||||
print(f"Unique classes: {len(np.unique(mask))}")
|
||||
```
|
||||
|
||||
### Face Anti-Spoofing
|
||||
|
||||
Detect if a face is real or fake (photo, video replay, mask):
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet() # Uses V2 by default
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
# result.confidence: confidence score
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
### Face Anonymization
|
||||
|
||||
Protect privacy by blurring or pixelating faces with 5 different methods:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace, anonymize_faces
|
||||
import cv2
|
||||
|
||||
# Method 1: One-liner with automatic detection
|
||||
image = cv2.imread("photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
|
||||
# Method 2: Manual control with custom parameters
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
# Available blur methods:
|
||||
methods = {
|
||||
'pixelate': BlurFace(method='pixelate', pixel_blocks=10), # Blocky effect (news media standard)
|
||||
'gaussian': BlurFace(method='gaussian', blur_strength=3.0), # Smooth, natural blur
|
||||
'blackout': BlurFace(method='blackout', color=(0, 0, 0)), # Solid color boxes (maximum privacy)
|
||||
'elliptical': BlurFace(method='elliptical', margin=20), # Soft oval blur (natural face shape)
|
||||
'median': BlurFace(method='median', blur_strength=3.0) # Edge-preserving blur
|
||||
}
|
||||
|
||||
# Real-time webcam anonymization
|
||||
cap = cv2.VideoCapture(0)
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
@@ -167,16 +312,20 @@ print(f"{gender}, {age} years old")
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.privacy import BlurFace, anonymize_faces
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
# Create detector with default settings
|
||||
detector = RetinaFace()
|
||||
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name='scrfd_10g_kps',
|
||||
conf_thresh=0.8,
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS, # SCRFDWeights.SCRFD_500M_KPS
|
||||
confidence_threshold=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
# Or with defaults settings: detector = SCRFD()
|
||||
|
||||
# Recognition and landmarks
|
||||
recognizer = ArcFace()
|
||||
@@ -186,15 +335,24 @@ landmarker = Landmark106()
|
||||
### Direct Model Instantiation
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, SCRFD, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface import RetinaFace, SCRFD, YOLOv5Face, ArcFace, MobileFace, SphereFace
|
||||
from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
|
||||
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4
|
||||
)
|
||||
# Or detector = RetinaFace()
|
||||
|
||||
# YOLOv5-Face detection
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5
|
||||
)
|
||||
# Or detector = YOLOv5Face
|
||||
|
||||
# Recognition
|
||||
recognizer = ArcFace() # Uses default weights
|
||||
@@ -208,28 +366,77 @@ recognizer = SphereFace() # Angular softmax alternative
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.8) # methods: retinaface, scrfd, yolov5face
|
||||
```
|
||||
|
||||
### Key Parameters (quick reference)
|
||||
|
||||
**Detection**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
|
||||
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
|
||||
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `confidence_threshold=0.6`, `nms_threshold=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
|
||||
|
||||
**Recognition**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ----------------------------------------- | ------------------------------------- |
|
||||
| `ArcFace` | `model_name=ArcFaceWeights.MNET` | Returns 512-dim normalized embeddings |
|
||||
| `MobileFace` | `model_name=MobileFaceWeights.MNET_V2` | Lightweight embeddings |
|
||||
| `SphereFace` | `model_name=SphereFaceWeights.SPHERE20` | Angular softmax variant |
|
||||
|
||||
**Landmark & Attributes**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| --------------- | --------------------------------------------------------------------- | --------------------------------------- |
|
||||
| `Landmark106` | No required params | 106-point landmarks |
|
||||
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Returns `AttributeResult` with gender, age |
|
||||
| `FairFace` | `model_name=FairFaceWeights.DEFAULT`, `input_size=(224, 224)` | Returns `AttributeResult` with gender, age_group, race |
|
||||
| `Emotion` | `model_weights=DDAMFNWeights.AFFECNET7`, `input_size=(112, 112)` | Requires 5-point landmarks; TorchScript |
|
||||
|
||||
**Gaze Estimation**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ------------- | ------------------------------------------ | ------------------------------------ |
|
||||
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns `GazeResult(pitch, yaw)` in radians; trained on Gaze360 |
|
||||
|
||||
**Face Parsing**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ---------- | ---------------------------------------- | ------------------------------------ |
|
||||
| `BiSeNet` | `model_name=ParsingWeights.RESNET18`, `input_size=(512, 512)` | 19 facial component classes; BiSeNet architecture with ResNet backbone |
|
||||
|
||||
**Anti-Spoofing**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ------------- | ----------------------------------------- | ------------------------------------ |
|
||||
| `MiniFASNet` | `model_name=MiniFASNetWeights.V2` | Returns `SpoofingResult(is_real, confidence)` |
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
| ------------------ | ------ | ------ | ------ | ---------------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| yolov5n_face | 93.61% | 91.52% | 80.53% | Lightweight/Mobile |
|
||||
| yolov5s_face | 94.33% | 92.61% | 83.15% | Real-time + accuracy |
|
||||
| yolov5m_face | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
_Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714), [YOLOv5-Face](https://arxiv.org/abs/2105.12931)_
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
python tools/detection.py --source assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
@@ -242,6 +449,21 @@ See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
|
||||
## Examples
|
||||
|
||||
### Jupyter Notebooks
|
||||
|
||||
Interactive examples covering common face analysis tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction from face images | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
### Webcam Face Detection
|
||||
|
||||
```python
|
||||
@@ -260,11 +482,17 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=0.6,
|
||||
)
|
||||
|
||||
cv2.imshow("Face Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
@@ -290,7 +518,7 @@ for person_id, image_path in person_images.items():
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
image, faces[0].landmarks
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
@@ -299,7 +527,7 @@ query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
query_image, query_faces[0].landmarks
|
||||
)
|
||||
|
||||
# Find best match
|
||||
@@ -394,12 +622,29 @@ pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Format code
|
||||
black uniface/
|
||||
isort uniface/
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
@@ -408,34 +653,32 @@ uniface/
|
||||
│ ├── detection/ # Face detection models
|
||||
│ ├── recognition/ # Face recognition models
|
||||
│ ├── landmark/ # Landmark detection
|
||||
│ ├── parsing/ # Face parsing
|
||||
│ ├── gaze/ # Gaze estimation
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── spoofing/ # Face anti-spoofing
|
||||
│ ├── privacy/ # Face anonymization & blurring
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
└── tools/ # CLI utilities
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
### Model Training & Architectures
|
||||
|
||||
- **RetinaFace Training**: [yakhyo/retinaface-pytorch](https://github.com/yakhyo/retinaface-pytorch) - PyTorch implementation and training code
|
||||
- **YOLOv5-Face ONNX**: [yakhyo/yolov5-face-onnx-inference](https://github.com/yakhyo/yolov5-face-onnx-inference) - ONNX inference implementation
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet face parsing training code and pretrained weights
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
- **RetinaFace**: [Single-Shot Multi-Level Face Localisation in the Wild](https://arxiv.org/abs/1905.00641)
|
||||
- **SCRFD**: [Sample and Computation Redistribution for Efficient Face Detection](https://arxiv.org/abs/2105.04714)
|
||||
- **ArcFace**: [Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
|
||||
|
||||
---
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
|
||||
|
||||
BIN
assets/einstien.png
Normal file
BIN
assets/einstien.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 MiB |
BIN
assets/scientists.png
Normal file
BIN
assets/scientists.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.9 MiB |
312
examples/01_face_detection.ipynb
Normal file
312
examples/01_face_detection.ipynb
Normal file
File diff suppressed because one or more lines are too long
240
examples/02_face_alignment.ipynb
Normal file
240
examples/02_face_alignment.ipynb
Normal file
File diff suppressed because one or more lines are too long
273
examples/03_face_verification.ipynb
Normal file
273
examples/03_face_verification.ipynb
Normal file
File diff suppressed because one or more lines are too long
368
examples/04_face_search.ipynb
Normal file
368
examples/04_face_search.ipynb
Normal file
File diff suppressed because one or more lines are too long
327
examples/05_face_analyzer.ipynb
Normal file
327
examples/05_face_analyzer.ipynb
Normal file
File diff suppressed because one or more lines are too long
387
examples/06_face_parsing.ipynb
Normal file
387
examples/06_face_parsing.ipynb
Normal file
File diff suppressed because one or more lines are too long
325
examples/07_face_anonymization.ipynb
Normal file
325
examples/07_face_anonymization.ipynb
Normal file
File diff suppressed because one or more lines are too long
270
examples/08_gaze_estimation.ipynb
Normal file
270
examples/08_gaze_estimation.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
129
pyproject.toml
129
pyproject.toml
@@ -1,12 +1,44 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.1.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
version = "2.0.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.11,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"face-parsing",
|
||||
"face-segmentation",
|
||||
"gaze-estimation",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
"deep-learning",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
"bisenet",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
@@ -14,24 +46,103 @@ dependencies = [
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0"]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = {} }
|
||||
packages = { find = { where = ["."], include = ["uniface*"] } }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py311"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"*.egg-info",
|
||||
".venv",
|
||||
"venv",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
"*.ipynb",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = false
|
||||
warn_unused_ignores = true
|
||||
ignore_missing_imports = true
|
||||
exclude = ["tests/", "scripts/", "examples/"]
|
||||
# Disable strict return type checking for numpy operations
|
||||
disable_error_code = ["no-any-return"]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Collection of example scripts demonstrating UniFace functionality.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
- `run_detection.py` - Face detection on images
|
||||
- `run_age_gender.py` - Age and gender prediction
|
||||
- `run_landmarks.py` - Facial landmark detection
|
||||
- `run_recognition.py` - Face recognition and embeddings
|
||||
- `run_face_search.py` - Face search and matching
|
||||
- `run_video_detection.py` - Video processing with face detection
|
||||
- `batch_process.py` - Batch processing of image folders
|
||||
- `download_model.py` - Download and manage models
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
|
||||
# Age and gender detection
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
|
||||
# Webcam demo
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
```
|
||||
|
||||
## Import Examples
|
||||
|
||||
The scripts use direct class imports for better developer experience:
|
||||
|
||||
```python
|
||||
# Face Detection
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
|
||||
detector = RetinaFace() # or SCRFD()
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Face Recognition
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
recognizer = ArcFace() # or MobileFace(), SphereFace()
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
|
||||
# Age & Gender
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
age_gender = AgeGender()
|
||||
gender, age = age_gender.predict(image, bbox)
|
||||
|
||||
# Landmarks
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
```
|
||||
|
||||
## Available Classes
|
||||
|
||||
**Detection:**
|
||||
- `RetinaFace` - High accuracy face detection
|
||||
- `SCRFD` - Fast face detection
|
||||
|
||||
**Recognition:**
|
||||
- `ArcFace` - High accuracy face recognition
|
||||
- `MobileFace` - Lightweight face recognition
|
||||
- `SphereFace` - Alternative face recognition
|
||||
|
||||
**Attributes:**
|
||||
- `AgeGender` - Age and gender prediction
|
||||
|
||||
**Landmarks:**
|
||||
- `Landmark106` - 106-point facial landmarks
|
||||
|
||||
## Common Options
|
||||
|
||||
Most scripts support:
|
||||
- `--help` - Show usage information
|
||||
- `--verbose` - Enable detailed logging
|
||||
- `--detector` - Choose detector (retinaface, scrfd)
|
||||
- `--threshold` - Set confidence threshold
|
||||
|
||||
## Testing
|
||||
|
||||
Run basic functionality test:
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
|
||||
For comprehensive testing, see the main project tests:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
@@ -1,157 +0,0 @@
|
||||
"""Batch Image Processing Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
image_files = []
|
||||
for ext in extensions:
|
||||
image_files.extend(input_dir.glob(f"*.{ext}"))
|
||||
image_files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
|
||||
return sorted(image_files)
|
||||
|
||||
|
||||
def process_single_image(detector, image_path: Path, output_dir: Path,
|
||||
vis_threshold: float, skip_existing: bool) -> dict:
|
||||
output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}"
|
||||
|
||||
# Skip if already processed
|
||||
if skip_existing and output_path.exists():
|
||||
return {"status": "skipped", "faces": 0}
|
||||
|
||||
# Load image
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return {"status": "error", "error": "Failed to load image"}
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = detector.detect(image)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return {"status": "success", "faces": len(faces)}
|
||||
|
||||
|
||||
def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple,
|
||||
vis_threshold: float, skip_existing: bool):
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
# Create output directory
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get image files
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in '{input_dir}' with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Input: {input_dir}")
|
||||
print(f"Output: {output_dir}")
|
||||
print(f"Found {len(image_files)} images\n")
|
||||
|
||||
# Process images
|
||||
results = {
|
||||
"success": 0,
|
||||
"skipped": 0,
|
||||
"error": 0,
|
||||
"total_faces": 0
|
||||
}
|
||||
|
||||
with tqdm(image_files, desc="Processing images", unit="img") as pbar:
|
||||
for image_path in pbar:
|
||||
result = process_single_image(
|
||||
detector, image_path, output_path,
|
||||
vis_threshold, skip_existing
|
||||
)
|
||||
|
||||
if result["status"] == "success":
|
||||
results["success"] += 1
|
||||
results["total_faces"] += result["faces"]
|
||||
pbar.set_postfix({"faces": result["faces"]})
|
||||
elif result["status"] == "skipped":
|
||||
results["skipped"] += 1
|
||||
else:
|
||||
results["error"] += 1
|
||||
print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Print summary
|
||||
print(f"\nBatch processing complete!")
|
||||
print(f" Total images: {len(image_files)}")
|
||||
print(f" Successfully processed: {results['success']}")
|
||||
print(f" Skipped: {results['skipped']}")
|
||||
print(f" Errors: {results['error']}")
|
||||
print(f" Total faces detected: {results['total_faces']}")
|
||||
if results['success'] > 0:
|
||||
print(f" Average faces per image: {results['total_faces']/results['success']:.2f}")
|
||||
print(f"\nResults saved to: {output_dir}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True,
|
||||
help="Input directory containing images")
|
||||
parser.add_argument("--output", type=str, required=True,
|
||||
help="Output directory for processed images")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp",
|
||||
help="Comma-separated list of image extensions")
|
||||
parser.add_argument("--skip_existing", action="store_true",
|
||||
help="Skip files that already exist in output directory")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input directory exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Parse extensions
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process batch
|
||||
batch_process(detector, args.input, args.output, extensions,
|
||||
args.threshold, args.skip_existing)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,77 +0,0 @@
|
||||
import argparse
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights,
|
||||
SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
|
||||
# All available model types
|
||||
ALL_MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download and verify model weights.")
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
type=str,
|
||||
choices=list(ALL_MODEL_TYPES.keys()),
|
||||
help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model and not args.model_type:
|
||||
# Backward compatibility - assume RetinaFace
|
||||
try:
|
||||
weight = RetinaFaceWeights[args.model]
|
||||
print(f"Downloading RetinaFace model: {weight.value}")
|
||||
verify_model_weights(weight)
|
||||
print("Model downloaded successfully.")
|
||||
except KeyError:
|
||||
print(f"Invalid RetinaFace model: {args.model}")
|
||||
print(f"Available models: {[m.name for m in RetinaFaceWeights]}")
|
||||
return
|
||||
|
||||
if args.model_type:
|
||||
# Download all models from specific type
|
||||
model_enum = ALL_MODEL_TYPES[args.model_type]
|
||||
print(f"Downloading all {args.model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
else:
|
||||
# Download all models from all types
|
||||
print("Downloading all models...")
|
||||
for model_type, model_enum in ALL_MODEL_TYPES.items():
|
||||
print(f"\nDownloading {model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
|
||||
print("\nDownload process completed.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,163 +0,0 @@
|
||||
"""Age and Gender Detection Demo Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, AgeGender
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f" Face {i+1}: {gender}, {age} years old")
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, vis_threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face['bbox'])
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(frame, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(frame, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing age/gender model...")
|
||||
age_gender = AgeGender()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,80 +0,0 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
# 1. Get the list of face dictionaries from the detector
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
# 2. Unpack the data into separate lists
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
|
||||
# 3. Pass the unpacked lists to the drawing function
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved at: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection on an image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
|
||||
parser.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Detection method to use."
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
|
||||
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.method}")
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
avg_time = 0
|
||||
for i in range(args.iterations):
|
||||
start = time.time()
|
||||
run_inference(detector, args.image, args.threshold, args.save_dir)
|
||||
elapsed = time.time() - start
|
||||
print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds")
|
||||
if i >= 0: # Avoid counting the first run if it includes model loading time
|
||||
avg_time += elapsed
|
||||
|
||||
if args.iterations > 1:
|
||||
# Adjust average calculation to exclude potential first-run overhead
|
||||
effective_iterations = max(1, args.iterations)
|
||||
print(
|
||||
f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,110 +0,0 @@
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
# Get landmarks from the first detected face dictionary
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
|
||||
# Use normalized embedding for more reliable similarity comparison
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
return embedding
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
print("Webcam started. Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Loop through each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from the dictionary
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Get the normalized embedding for the current face
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
|
||||
# Compare with the reference embedding
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
# Draw results
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser.add_argument(
|
||||
"--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
help="Face recognition method.",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
|
||||
enable_logging()
|
||||
|
||||
print("Initializing models...")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
|
||||
print("Extracting reference embedding...")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,149 +0,0 @@
|
||||
"""Facial Landmark Detection Demo Script"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, Landmark106
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Process each face
|
||||
for i, face in enumerate(faces):
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i+1}: Extracted {len(landmarks)} landmarks")
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Face {i+1}", (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
# Add total count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
|
||||
frame_count = 0
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each face
|
||||
for face in faces:
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing landmark detector...")
|
||||
landmarker = Landmark106()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,128 +0,0 @@
|
||||
import cv2
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
|
||||
|
||||
# Process the first detected face
|
||||
first_face = faces[0]
|
||||
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
|
||||
|
||||
# Extract embedding using the landmarks from the face dictionary
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Print some info about the embeddings
|
||||
print(f" - Embedding shape: {embedding.shape}")
|
||||
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
|
||||
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
|
||||
# Load images
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print(f"Error: Failed to load images")
|
||||
return
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print("Error: No faces detected in one or both images")
|
||||
return
|
||||
|
||||
# Get landmarks for first face in each image
|
||||
landmarks1 = np.array(faces1[0]['landmarks'])
|
||||
landmarks2 = np.array(faces2[0]['landmarks'])
|
||||
|
||||
# Get normalized embeddings
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# Compute similarity
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'}")
|
||||
print(f"Threshold: {threshold}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison.")
|
||||
parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.")
|
||||
parser.add_argument("--image1", type=str, help="Path to first image for comparison.")
|
||||
parser.add_argument("--image2", type=str, help="Path to second image for comparison.")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.")
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method to use."
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
|
||||
if args.image1 and args.image2:
|
||||
# Face comparison mode
|
||||
print(f"Comparing faces: {args.image1} vs {args.image2}")
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
# Single image embedding extraction mode
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison")
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,142 +0,0 @@
|
||||
"""Video Face Detection Script"""
|
||||
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
|
||||
fps: int = None, show_preview: bool = False):
|
||||
# Open input video
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
source_fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
output_fps = fps if fps is not None else source_fps
|
||||
|
||||
print(f"📹 Input: {input_path}")
|
||||
print(f" Resolution: {width}x{height}")
|
||||
print(f" FPS: {source_fps:.2f}")
|
||||
print(f" Total frames: {total_frames}")
|
||||
print(f"\n📹 Output: {output_path}")
|
||||
print(f" FPS: {output_fps:.2f}\n")
|
||||
|
||||
# Initialize video writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
# Process frames
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
try:
|
||||
with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
|
||||
# Add frame info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Write frame
|
||||
out.write(frame)
|
||||
|
||||
# Show preview if requested
|
||||
if show_preview:
|
||||
cv2.imshow("Processing Video - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print("\nProcessing cancelled by user")
|
||||
break
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nProcessing interrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
# Summary
|
||||
print(f"\nProcessing complete!")
|
||||
print(f" Processed: {frame_count} frames")
|
||||
print(f" Total faces detected: {total_faces}")
|
||||
print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
|
||||
print(f" Output saved: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Path to input video")
|
||||
parser.add_argument("--output", type=str, required=True, help="Path to output video")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--fps", type=int, default=None,
|
||||
help="Output FPS (default: same as input)")
|
||||
parser.add_argument("--preview", action="store_true",
|
||||
help="Show live preview during processing")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir = Path(args.output).parent
|
||||
if output_dir != Path('.'):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process video
|
||||
process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,35 +0,0 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compute SHA256 hash of a model weight file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"file",
|
||||
type=Path,
|
||||
help="Path to the model weight file (.onnx, .pth, etc)."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f"File does not exist: {args.file}")
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"`SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,7 +1,15 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -20,23 +28,26 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, "AgeGender model initialization failed."
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender, str), f"Gender should be string, got {type(gender)}"
|
||||
assert isinstance(age, int), f"Age should be int, got {type(age)}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Gender should be 'Male' or 'Female', got '{gender}'"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for bbox {bbox}"
|
||||
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for image size {size}"
|
||||
assert 0 <= age <= 120, f"Age out of range for image size {size}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender1 == gender2, "Same input should produce same gender prediction"
|
||||
assert age1 == age2, "Same input should produce same age prediction"
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as list"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as numpy array"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,24 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, "Should have 3 predictions"
|
||||
for gender, age in results:
|
||||
assert gender in ['Male', 'Female']
|
||||
assert 0 <= age <= 120
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f"Age should be non-negative, got {age}"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f"{gender}, {age}y"
|
||||
assert isinstance(text, str), "Should be able to format as string"
|
||||
assert "Male" in text or "Female" in text, "Text should contain gender"
|
||||
assert "y" in text, "Text should contain 'y' for years"
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -17,7 +25,7 @@ def test_create_detector_retinaface():
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector('retinaface')
|
||||
assert detector is not None, "Failed to create RetinaFace detector"
|
||||
assert detector is not None, 'Failed to create RetinaFace detector'
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
@@ -25,7 +33,7 @@ def test_create_detector_scrfd():
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector('scrfd')
|
||||
assert detector is not None, "Failed to create SCRFD detector"
|
||||
assert detector is not None, 'Failed to create SCRFD detector'
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
@@ -35,10 +43,10 @@ def test_create_detector_with_config():
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, "Failed to create detector with custom config"
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
@@ -53,12 +61,8 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector(
|
||||
'scrfd',
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
assert detector is not None, "Failed to create SCRFD with specific model"
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
@@ -67,7 +71,7 @@ def test_create_recognizer_arcface():
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('arcface')
|
||||
assert recognizer is not None, "Failed to create ArcFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create ArcFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
@@ -75,7 +79,7 @@ def test_create_recognizer_mobileface():
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('mobileface')
|
||||
assert recognizer is not None, "Failed to create MobileFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create MobileFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
@@ -83,7 +87,7 @@ def test_create_recognizer_sphereface():
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('sphereface')
|
||||
assert recognizer is not None, "Failed to create SphereFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
@@ -100,7 +104,7 @@ def test_create_landmarker():
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker('2d106det')
|
||||
assert landmarker is not None, "Failed to create Landmark106 detector"
|
||||
assert landmarker is not None, 'Failed to create Landmark106 detector'
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
@@ -108,7 +112,7 @@ def test_create_landmarker_default():
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, "Failed to create default landmarker"
|
||||
assert landmarker is not None, 'Failed to create default landmarker'
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
@@ -127,7 +131,7 @@ def test_detect_faces_retinaface():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
@@ -137,7 +141,7 @@ def test_detect_faces_scrfd():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
@@ -145,13 +149,13 @@ def test_detect_faces_with_threshold():
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, "All detections should meet confidence threshold"
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -161,7 +165,7 @@ def test_detect_faces_default_method():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list with default method"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
@@ -171,8 +175,8 @@ def test_detect_faces_empty_image():
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "Should return a list even for empty image"
|
||||
assert len(faces) == 0, "Should detect no faces in blank image"
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
@@ -182,8 +186,8 @@ def test_list_available_detectors():
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), "Should return a dictionary of detectors"
|
||||
assert len(detectors) > 0, "Should have at least one detector available"
|
||||
assert isinstance(detectors, dict), 'Should return a dictionary of detectors'
|
||||
assert len(detectors) > 0, 'Should have at least one detector available'
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
@@ -206,7 +210,7 @@ def test_detector_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detector should return list of faces"
|
||||
assert isinstance(faces, list), 'Detector should return list of faces'
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
@@ -217,8 +221,8 @@ def test_recognizer_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, "Recognizer should return embedding"
|
||||
assert embedding.shape[1] == 512, "Should return 512-dimensional embedding"
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
@@ -230,8 +234,8 @@ def test_landmarker_inference_from_factory():
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, "Landmarker should return landmarks"
|
||||
assert landmarks.shape == (106, 2), "Should return 106 landmarks"
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
@@ -243,15 +247,15 @@ def test_multiple_detector_creation():
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, "Should create separate instances"
|
||||
assert detector1 is not detector2, 'Should create separate instances'
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
@@ -267,12 +271,12 @@ def test_factory_returns_correct_types():
|
||||
"""
|
||||
Test that factory functions return instances of the correct types.
|
||||
"""
|
||||
from uniface import RetinaFace, ArcFace, Landmark106
|
||||
from uniface import ArcFace, Landmark106, RetinaFace
|
||||
|
||||
detector = create_detector('retinaface')
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
|
||||
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
|
||||
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"
|
||||
assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance"
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -20,17 +28,17 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, "Landmark106 model initialization failed."
|
||||
assert landmark_model is not None, 'Landmark106 model initialization failed.'
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}"
|
||||
assert landmarks.shape == (106, 2), f'Expected shape (106, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}"
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
@@ -45,8 +53,8 @@ def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds"
|
||||
assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds"
|
||||
assert x_in_bounds >= 95, f'Only {x_in_bounds}/106 x-coordinates within bounds'
|
||||
assert y_in_bounds >= 95, f'Only {y_in_bounds}/106 y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
@@ -58,22 +66,22 @@ def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape"
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type"
|
||||
assert landmarks_int.shape == (106, 2), 'Integer conversion should preserve shape'
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], 'Should convert to integer type'
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks"
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
@@ -82,19 +90,19 @@ def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for image size {size}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as list"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as numpy array"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
@@ -103,5 +111,5 @@ def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, "Landmarks should have variation in x-coordinates"
|
||||
assert y_variance > 0, "Landmarks should have variation in y-coordinates"
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
|
||||
122
tests/test_parsing.py
Normal file
122
tests/test_parsing.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet, create_face_parser
|
||||
|
||||
|
||||
def test_bisenet_initialization():
|
||||
"""Test BiSeNet initialization."""
|
||||
parser = BiSeNet()
|
||||
assert parser is not None
|
||||
assert parser.input_size == (512, 512)
|
||||
|
||||
|
||||
def test_bisenet_with_different_models():
|
||||
"""Test BiSeNet with different model weights."""
|
||||
parser_resnet18 = BiSeNet(model_name=ParsingWeights.RESNET18)
|
||||
parser_resnet34 = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
assert parser_resnet18 is not None
|
||||
assert parser_resnet34 is not None
|
||||
|
||||
|
||||
def test_bisenet_preprocess():
|
||||
"""Test preprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Preprocess
|
||||
preprocessed = parser.preprocess(face_image)
|
||||
|
||||
assert preprocessed.shape == (1, 3, 512, 512)
|
||||
assert preprocessed.dtype == np.float32
|
||||
|
||||
|
||||
def test_bisenet_postprocess():
|
||||
"""Test postprocessing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create dummy model output (batch_size=1, num_classes=19, H=512, W=512)
|
||||
dummy_output = np.random.randn(1, 19, 512, 512).astype(np.float32)
|
||||
|
||||
# Postprocess
|
||||
mask = parser.postprocess(dummy_output, original_size=(256, 256))
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19 # 19 classes (0-18)
|
||||
|
||||
|
||||
def test_bisenet_parse():
|
||||
"""Test end-to-end parsing."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Create a dummy face image
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Parse
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
assert mask.min() >= 0
|
||||
assert mask.max() < 19
|
||||
|
||||
|
||||
def test_bisenet_callable():
|
||||
"""Test that BiSeNet is callable."""
|
||||
parser = BiSeNet()
|
||||
face_image = np.random.randint(0, 255, (256, 256, 3), dtype=np.uint8)
|
||||
|
||||
# Should work as callable
|
||||
mask = parser(face_image)
|
||||
|
||||
assert mask.shape == (256, 256)
|
||||
assert mask.dtype == np.uint8
|
||||
|
||||
|
||||
def test_create_face_parser_with_enum():
|
||||
"""Test factory function with enum."""
|
||||
parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_with_string():
|
||||
"""Test factory function with string."""
|
||||
parser = create_face_parser('parsing_resnet18')
|
||||
assert parser is not None
|
||||
assert isinstance(parser, BiSeNet)
|
||||
|
||||
|
||||
def test_create_face_parser_invalid_model():
|
||||
"""Test factory function with invalid model name."""
|
||||
with pytest.raises(ValueError, match='Unknown face parsing model'):
|
||||
create_face_parser('invalid_model')
|
||||
|
||||
|
||||
def test_bisenet_different_input_sizes():
|
||||
"""Test parsing with different input image sizes."""
|
||||
parser = BiSeNet()
|
||||
|
||||
# Test with different sizes
|
||||
sizes = [(128, 128), (256, 256), (512, 512), (640, 480)]
|
||||
|
||||
for h, w in sizes:
|
||||
face_image = np.random.randint(0, 255, (h, w, 3), dtype=np.uint8)
|
||||
mask = parser.parse(face_image)
|
||||
|
||||
assert mask.shape == (h, w), f'Failed for size {h}x{w}'
|
||||
assert mask.dtype == np.uint8
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -41,13 +49,16 @@ def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# ArcFace Tests
|
||||
@@ -55,7 +66,7 @@ def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, "ArcFace model initialization failed."
|
||||
assert arcface_model is not None, 'ArcFace model initialization failed.'
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
@@ -65,8 +76,8 @@ def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
@@ -80,7 +91,7 @@ def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
@@ -88,7 +99,7 @@ def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}"
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
@@ -98,7 +109,7 @@ def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), "Same input should produce same embedding"
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
@@ -106,7 +117,7 @@ def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, "MobileFace model initialization failed."
|
||||
assert mobileface_model is not None, 'MobileFace model initialization failed.'
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
@@ -116,8 +127,8 @@ def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
@@ -129,7 +140,7 @@ def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
@@ -137,7 +148,7 @@ def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, "SphereFace model initialization failed."
|
||||
assert sphereface_model is not None, 'SphereFace model initialization failed.'
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
@@ -147,8 +158,8 @@ def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
@@ -160,7 +171,7 @@ def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
@@ -173,8 +184,7 @@ def test_different_models_different_embeddings(arcface_model, mobileface_model,
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), \
|
||||
"Different models should produce different embeddings"
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), 'Different models should produce different embeddings'
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
@@ -191,10 +201,11 @@ def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
@@ -205,7 +216,8 @@ def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}"
|
||||
assert similarity > 0.99, f'Same face should have similarity > 0.99, got {similarity}'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,35 +17,35 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
assert retinaface_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
@@ -45,11 +53,11 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,33 +17,33 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, "Model initialization failed."
|
||||
assert scrfd_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
@@ -43,14 +51,14 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
@@ -59,13 +67,13 @@ def test_different_input_sizes(scrfd_model):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f"Should return list for size {size}"
|
||||
assert isinstance(faces, list), f'Should return list for size {size}'
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, "SCRFD 10G model initialization failed."
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), "SCRFD 10G should return list of detections."
|
||||
assert isinstance(faces, list), 'SCRFD 10G should return list of detections.'
|
||||
|
||||
282
tests/test_types.py
Normal file
282
tests/test_types.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -18,13 +26,16 @@ def mock_landmarks():
|
||||
Create mock 5-point facial landmarks.
|
||||
Standard positions for a face roughly centered at (112/2, 112/2).
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041] # Right mouth corner
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041], # Right mouth corner
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# compute_similarity tests
|
||||
@@ -36,7 +47,7 @@ def test_compute_similarity_same_embedding():
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}"
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f'Self-similarity should be 1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
@@ -53,7 +64,7 @@ def test_compute_similarity_range():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
@@ -68,7 +79,7 @@ def test_compute_similarity_orthogonal():
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}"
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f'Orthogonal embeddings should have similarity 0.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
@@ -81,7 +92,7 @@ def test_compute_similarity_opposite():
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}"
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f'Opposite embeddings should have similarity -1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
@@ -98,7 +109,7 @@ def test_compute_similarity_symmetry():
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), "Similarity should be symmetric"
|
||||
assert np.isclose(sim_12, sim_21), 'Similarity should be symmetric'
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
@@ -113,7 +124,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}"
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -123,7 +134,7 @@ def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}"
|
||||
assert aligned.shape == (112, 112, 3), f'Expected shape (112, 112, 3), got {aligned.shape}'
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
@@ -132,7 +143,7 @@ def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}"
|
||||
assert aligned.dtype == np.uint8, f'Expected uint8, got {aligned.dtype}'
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
@@ -144,7 +155,7 @@ def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f"Failed for size {size}"
|
||||
assert aligned.shape == (*size, 3), f'Failed for size {size}'
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
@@ -154,7 +165,7 @@ def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face"
|
||||
assert np.allclose(aligned1, aligned2), 'Same input should produce same aligned face'
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
@@ -166,13 +177,13 @@ def test_face_alignment_landmarks_as_list(mock_image):
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
]
|
||||
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with landmarks as array"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with landmarks as array'
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
@@ -181,8 +192,8 @@ def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), "Pixel values should be >= 0"
|
||||
assert np.all(aligned <= 255), "Pixel values should be <= 255"
|
||||
assert np.all(aligned >= 0), 'Pixel values should be >= 0'
|
||||
assert np.all(aligned <= 255), 'Pixel values should be <= 255'
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
@@ -192,7 +203,7 @@ def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), "Aligned face should have some non-zero pixels"
|
||||
assert np.any(aligned > 0), 'Aligned face should have some non-zero pixels'
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
@@ -201,14 +212,23 @@ def test_face_alignment_from_different_positions(mock_image):
|
||||
"""
|
||||
# Landmarks at different positions
|
||||
positions = [
|
||||
np.array([[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]], dtype=np.float32),
|
||||
np.array([[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]], dtype=np.float32),
|
||||
np.array([[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]], dtype=np.float32),
|
||||
np.array(
|
||||
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
]
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}"
|
||||
assert aligned.shape == (112, 112, 3), f'Failed for landmarks at {landmarks[0]}'
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
@@ -216,16 +236,19 @@ def test_face_alignment_landmark_count(mock_image):
|
||||
Test that face_alignment works specifically with 5-point landmarks.
|
||||
"""
|
||||
# Standard 5-point landmarks
|
||||
landmarks_5pt = np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
landmarks_5pt = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with 5-point landmarks'
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
@@ -244,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
121
tools/README.md
Normal file
121
tools/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
```
|
||||
213
tools/age_gender.py
Normal file
213
tools/age_gender.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
105
tools/batch_process.py
Normal file
105
tools/batch_process.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
files = []
|
||||
for ext in extensions:
|
||||
files.extend(input_dir.glob(f'*.{ext}'))
|
||||
files.extend(input_dir.glob(f'*.{ext.upper()}'))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return -1
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Batch process images with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input directory')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output directory')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f'No images found with extensions {extensions}')
|
||||
return
|
||||
|
||||
print(f'Found {len(image_files)} images')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc='Processing', unit='img'):
|
||||
out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f'\nFailed: {img_path.name}')
|
||||
|
||||
print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
196
tools/detection.py
Normal file
196
tools/detection.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
60
tools/download_model.py
Normal file
60
tools/download_model.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f'Downloading: {weight.value}')
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f' Done: {weight.value}')
|
||||
except Exception as e:
|
||||
print(f' Failed: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Download model weights')
|
||||
parser.add_argument(
|
||||
'--model-type',
|
||||
type=str,
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help='Model type to download. If not specified, downloads all.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model_type:
|
||||
print(f'Downloading {args.model_type} models...')
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
print('Downloading all models...')
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f'\n{name}:')
|
||||
download_models(model_enum)
|
||||
|
||||
print('\nDone!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
239
tools/face_analyzer.py
Normal file
239
tools/face_analyzer.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
tools/face_anonymize.py
Normal file
281
tools/face_anonymize.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
213
tools/face_emotion.py
Normal file
213
tools/face_emotion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
250
tools/face_parsing.py
Normal file
250
tools/face_parsing.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
bbox: np.ndarray,
|
||||
image_shape: tuple[int, int],
|
||||
expand_ratio: float = 0.2,
|
||||
expand_top_ratio: float = 0.4,
|
||||
) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Expand bounding box to include full head region for face parsing.
|
||||
|
||||
Face detection typically returns tight face boxes, but face parsing
|
||||
requires the full head including hair, ears, and neck.
|
||||
|
||||
Args:
|
||||
bbox: Original bounding box [x1, y1, x2, y2].
|
||||
image_shape: Image dimensions as (height, width).
|
||||
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
|
||||
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
height, width = image_shape[:2]
|
||||
|
||||
face_width = x2 - x1
|
||||
face_height = y2 - y1
|
||||
|
||||
expand_x = int(face_width * expand_ratio)
|
||||
expand_y_bottom = int(face_height * expand_ratio)
|
||||
expand_y_top = int(face_height * expand_top_ratio)
|
||||
|
||||
new_x1 = max(0, x1 - expand_x)
|
||||
new_y1 = max(0, y1 - expand_y_top)
|
||||
new_x2 = min(width, x2 + expand_x)
|
||||
new_y2 = min(height, y2 + expand_y_bottom)
|
||||
|
||||
return new_x1, new_y1, new_x2, new_y2
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
parser_arg.add_argument(
|
||||
'--expand-ratio',
|
||||
type=float,
|
||||
default=0.2,
|
||||
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/face_search.py
Normal file
190
tools/face_search.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.reference):
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/fairface.py
Normal file
214
tools/fairface.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/gaze_estimation.py
Normal file
190
tools/gaze_estimation.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
187
tools/landmarks.py
Normal file
187
tools/landmarks.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
110
tools/recognition.py
Normal file
110
tools/recognition.py
Normal file
@@ -0,0 +1,110 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
print('No faces detected.')
|
||||
return
|
||||
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
print(f' Embedding shape: {embedding.shape}')
|
||||
print(f' L2 norm (raw): {np.linalg.norm(embedding):.4f}')
|
||||
print(f' L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}')
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print('Error: Failed to load one or both images')
|
||||
return
|
||||
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f'Similarity: {similarity:.4f}')
|
||||
print(f'Result: {"Same person" if is_match else "Different person"} (threshold: {threshold})')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face recognition and comparison')
|
||||
parser.add_argument('--image', type=str, help='Single image for embedding extraction')
|
||||
parser.add_argument('--image1', type=str, help='First image for comparison')
|
||||
parser.add_argument('--image2', type=str, help='Second image for comparison')
|
||||
parser.add_argument('--threshold', type=float, default=0.35, help='Similarity threshold')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print('Error: Provide --image or both --image1 and --image2')
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
28
tools/sha256_generate.py
Normal file
28
tools/sha256_generate.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import argparse
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open('rb') as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b''):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Compute SHA256 hash of a file')
|
||||
parser.add_argument('file', type=Path, help='Path to file')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f'File does not exist: {args.file}')
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/spoofing.py
Normal file
214
tools/spoofing.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face Anti-Spoofing Detection.
|
||||
|
||||
Usage:
|
||||
python tools/spoofing.py --source path/to/image.jpg
|
||||
python tools/spoofing.py --source path/to/video.mp4
|
||||
python tools/spoofing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
bbox: list,
|
||||
is_real: bool,
|
||||
confidence: float,
|
||||
thickness: int = 2,
|
||||
) -> None:
|
||||
"""Draw bounding box with anti-spoofing result.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box in [x1, y1, x2, y2] format.
|
||||
is_real: True if real face, False if fake.
|
||||
confidence: Confidence score (0.0 to 1.0).
|
||||
thickness: Line thickness for bounding box.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
|
||||
color = (0, 255, 0) if is_real else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
label = 'Real' if is_real else 'Fake'
|
||||
text = f'{label}: {confidence:.1%}'
|
||||
|
||||
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a single image for face anti-spoofing detection."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print('No faces detected in the image.')
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f' Face {i}: {label} ({result.confidence:.1%})')
|
||||
|
||||
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a video file for face anti-spoofing detection."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
|
||||
"""Run real-time anti-spoofing detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
cv2.imshow('Face Anti-Spoofing', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
default='v2',
|
||||
choices=['v1se', 'v2'],
|
||||
help='Model variant: v1se or v2 (default: v2)',
|
||||
)
|
||||
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select model variant
|
||||
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
|
||||
|
||||
# Initialize models
|
||||
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
|
||||
detector = RetinaFace()
|
||||
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, spoofer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, spoofer, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, spoofer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
tools/video_detection.py
Normal file
180
tools/video_detection.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -11,52 +11,105 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "1.1.0"
|
||||
"""UniFace: A comprehensive library for face analysis.
|
||||
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face)
|
||||
- Face recognition (ArcFace, MobileFace, SphereFace)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
- Age, gender, and emotion prediction
|
||||
- Face anti-spoofing
|
||||
- Privacy/anonymization
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '2.0.0'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .attribute import AgeGender
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
YOLOv5Face,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__license__",
|
||||
"__version__",
|
||||
# Metadata
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
# Core classes
|
||||
'Face',
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
"create_detector",
|
||||
"create_landmarker",
|
||||
"create_recognizer",
|
||||
"detect_faces",
|
||||
"list_available_detectors",
|
||||
'create_detector',
|
||||
'create_face_parser',
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
"RetinaFace",
|
||||
"SCRFD",
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
'YOLOv5Face',
|
||||
# Recognition models
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
"Landmark106",
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'GazeResult',
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
"compute_similarity",
|
||||
"draw_detections",
|
||||
"face_alignment",
|
||||
"verify_model_weights",
|
||||
"Logger",
|
||||
"enable_logging",
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
113
uniface/analyzer.py
Normal file
113
uniface/analyzer.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes.
|
||||
|
||||
This class provides a high-level interface for face analysis by combining
|
||||
multiple components: face detection, recognition (embedding extraction),
|
||||
and attribute prediction (age, gender, race).
|
||||
|
||||
Args:
|
||||
detector: Face detector instance for detecting faces in images.
|
||||
recognizer: Optional face recognizer for extracting embeddings.
|
||||
age_gender: Optional age/gender predictor.
|
||||
fairface: Optional FairFace predictor for demographics.
|
||||
|
||||
Example:
|
||||
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
>>> detector = RetinaFace()
|
||||
>>> recognizer = ArcFace()
|
||||
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
>>> faces = analyzer.analyze(image)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: BaseRecognizer | None = None,
|
||||
age_gender: AgeGender | None = None,
|
||||
fairface: FairFace | None = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
self.fairface = fairface
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
if fairface:
|
||||
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> list[Face]:
|
||||
"""Analyze faces in an image.
|
||||
|
||||
Performs face detection and optionally extracts embeddings and
|
||||
predicts attributes for each detected face.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
|
||||
Returns:
|
||||
List of Face objects with detection results and any predicted attributes.
|
||||
"""
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
result = self.age_gender.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
if self.fairface is not None:
|
||||
try:
|
||||
result = self.fairface.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
|
||||
if self.recognizer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
if self.fairface:
|
||||
parts.append(f'fairface={self.fairface.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,16 +2,22 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict, Any, List, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
@@ -19,25 +25,28 @@ except ImportError:
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = [
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
"create_attribute_predictor",
|
||||
"predict_attributes"
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
|
||||
def create_attribute_predictor(
|
||||
model_name: Union[AgeGenderWeights, DDAMFNWeights],
|
||||
**kwargs: Any
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
@@ -47,11 +56,13 @@ def create_attribute_predictor(
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -59,48 +70,45 @@ def create_attribute_predictor(
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(f"Unsupported attribute model: {model_name}. "
|
||||
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. '
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray,
|
||||
detections: List[Dict[str, np.ndarray]],
|
||||
predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender'] = gender
|
||||
face['attributes']['age'] = age
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return detections
|
||||
return faces
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,8 +12,9 @@ from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ["AgeGender"]
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
|
||||
class AgeGender(Attribute):
|
||||
@@ -22,20 +22,32 @@ class AgeGender(Attribute):
|
||||
Age and gender prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender (0 for female,
|
||||
1 for male) from a face image. It requires a bounding box to locate the face.
|
||||
functionality for predicting age (in years) and gender ID (0 for Female,
|
||||
1 for Male) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
Defaults to `AgeGenderWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
|
||||
Args:
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights
|
||||
to load.
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, automatically detected from model metadata. Defaults to None.
|
||||
"""
|
||||
Logger.info(f"Initializing AgeGender with model={model_name.name}")
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._user_input_size = input_size # Store user preference
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
@@ -47,14 +59,29 @@ class AgeGender(Attribute):
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
# Use user-provided size if given, otherwise auto-detect from model
|
||||
model_input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
if self._user_input_size is not None:
|
||||
self.input_size = self._user_input_size
|
||||
if self._user_input_size != model_input_size:
|
||||
Logger.warning(
|
||||
f'Using custom input_size {self.input_size}, '
|
||||
f'but model expects {model_input_size}. This may affect accuracy.'
|
||||
)
|
||||
else:
|
||||
self.input_size = model_input_size
|
||||
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
@@ -76,11 +103,15 @@ class AgeGender(Attribute):
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[str, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -88,17 +119,15 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label ("Female" or "Male")
|
||||
and age (in years).
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = "Female" if gender_id == 0 else "Male"
|
||||
gender = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender, age
|
||||
return AttributeResult(gender=gender, age=age)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[str, int]:
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -107,66 +136,8 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label and age.
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender, age = self.postprocess(prediction)
|
||||
return gender, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection["bbox"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender, age = age_gender_predictor.predict(frame, box)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{gender}, {age}"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
return self.postprocess(prediction)
|
||||
|
||||
@@ -4,8 +4,13 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
@@ -26,7 +31,7 @@ class Attribute(ABC):
|
||||
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
|
||||
warm-up procedures to prepare the model for prediction.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the _initialize_model method.")
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -46,7 +51,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The preprocessed data ready for model inference.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the preprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, prediction: Any) -> Any:
|
||||
@@ -63,7 +68,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final, processed attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the postprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -82,7 +87,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final predicted attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the predict method.")
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""
|
||||
|
||||
@@ -2,18 +2,19 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from typing import Tuple, Union, List
|
||||
import torch
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.types import EmotionResult
|
||||
|
||||
__all__ = ["Emotion"]
|
||||
__all__ = ['Emotion']
|
||||
|
||||
|
||||
class Emotion(Attribute):
|
||||
@@ -28,7 +29,7 @@ class Emotion(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
@@ -37,15 +38,30 @@ class Emotion(Attribute):
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f"Initializing Emotion with model={model_weights.name}")
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
Logger.info(f'Initializing Emotion with model={model_weights.name}')
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.device = torch.device('mps')
|
||||
elif torch.cuda.is_available():
|
||||
self.device = torch.device('cuda')
|
||||
else:
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
|
||||
self.emotion_labels = [
|
||||
'Neutral',
|
||||
'Happy',
|
||||
'Sad',
|
||||
'Surprise',
|
||||
'Fear',
|
||||
'Disgust',
|
||||
'Angry',
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append("Contempt")
|
||||
self.emotion_labels.append('Contempt')
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
@@ -60,12 +76,12 @@ class Emotion(Attribute):
|
||||
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
|
||||
with torch.no_grad():
|
||||
self.model(dummy_input)
|
||||
Logger.info(f"Successfully initialized Emotion model on {self.device}")
|
||||
Logger.info(f'Successfully initialized Emotion model on {self.device}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
@@ -77,7 +93,7 @@ class Emotion(Attribute):
|
||||
torch.Tensor: The preprocessed image tensor ready for inference.
|
||||
"""
|
||||
landmark = np.asarray(landmark)
|
||||
|
||||
|
||||
aligned_image, _ = face_alignment(image, landmark)
|
||||
|
||||
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
|
||||
@@ -90,7 +106,7 @@ class Emotion(Attribute):
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
@@ -98,9 +114,9 @@ class Emotion(Attribute):
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
return EmotionResult(emotion=emotion_label, confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
@@ -111,60 +127,3 @@ class Emotion(Attribute):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import create_detector
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{emotion} ({confidence:.2f})"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
193
uniface/attribute/fairface.py
Normal file
193
uniface/attribute/fairface.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import FairFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
|
||||
|
||||
# Label definitions
|
||||
RACE_LABELS = [
|
||||
'White',
|
||||
'Black',
|
||||
'Latino Hispanic',
|
||||
'East Asian',
|
||||
'Southeast Asian',
|
||||
'Indian',
|
||||
'Middle Eastern',
|
||||
]
|
||||
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
|
||||
|
||||
|
||||
class FairFace(Attribute):
|
||||
"""
|
||||
FairFace attribute prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting race (7 categories), gender (2 categories),
|
||||
and age (9 groups) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
The model is trained on the FairFace dataset which provides balanced demographics
|
||||
for more equitable predictions across different racial and gender groups.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
Defaults to `FairFaceWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224). Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FairFace prediction model.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224).
|
||||
"""
|
||||
Logger.info(f'Initializing FairFace with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self.input_size = input_size if input_size is not None else (224, 224)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load FairFace model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Preprocesses the face image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
# Crop face if bbox provided
|
||||
if bbox is not None:
|
||||
bbox = np.asarray(bbox, dtype=int)
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
|
||||
# Add padding (25% of face size)
|
||||
w, h = x2 - x1, y2 - y1
|
||||
padding = 0.25
|
||||
x_pad = int(w * padding)
|
||||
y_pad = int(h * padding)
|
||||
|
||||
x1 = max(0, x1 - x_pad)
|
||||
y1 = max(0, y1 - y_pad)
|
||||
x2 = min(image.shape[1], x2 + x_pad)
|
||||
y2 = min(image.shape[0], y2 + y_pad)
|
||||
|
||||
image = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize to input size (width, height for cv2.resize)
|
||||
image = cv2.resize(image, self.input_size[::-1])
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize with ImageNet mean and std
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# Transpose to CHW format and add batch dimension
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract race, gender, and age.
|
||||
|
||||
Args:
|
||||
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
|
||||
(race_logits, gender_logits, age_logits).
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
|
||||
"""
|
||||
race_logits, gender_logits, age_logits = prediction
|
||||
|
||||
# Apply softmax
|
||||
race_probs = self._softmax(race_logits[0])
|
||||
gender_probs = self._softmax(gender_logits[0])
|
||||
age_probs = self._softmax(age_logits[0])
|
||||
|
||||
# Get predictions
|
||||
race_idx = int(np.argmax(race_probs))
|
||||
raw_gender_idx = int(np.argmax(gender_probs))
|
||||
age_idx = int(np.argmax(age_probs))
|
||||
|
||||
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
|
||||
gender = 1 - raw_gender_idx
|
||||
|
||||
return AttributeResult(
|
||||
gender=gender,
|
||||
age_group=AGE_LABELS[age_idx],
|
||||
race=RACE_LABELS[race_idx],
|
||||
)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
|
||||
"""
|
||||
Predicts race, gender, and age for a face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing:
|
||||
- gender: 0=Female, 1=Male
|
||||
- age_group: Age range string like "20-29"
|
||||
- race: Race/ethnicity label
|
||||
"""
|
||||
# Preprocess
|
||||
input_blob = self.preprocess(image, bbox)
|
||||
|
||||
# Inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
|
||||
|
||||
# Postprocess
|
||||
return self.postprocess(outputs)
|
||||
|
||||
@staticmethod
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
"""Compute softmax values for numerical stability."""
|
||||
exp_x = np.exp(x - np.max(x))
|
||||
return exp_x / np.sum(exp_x)
|
||||
@@ -2,24 +2,42 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import math
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List
|
||||
__all__ = [
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
def resize_image(
|
||||
frame: np.ndarray,
|
||||
target_shape: tuple[int, int] = (640, 640),
|
||||
) -> tuple[np.ndarray, float]:
|
||||
"""Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
The image is resized to fit within the target dimensions and placed on a
|
||||
blank canvas (zero-padded to target size).
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
frame: Input image with shape (H, W, C).
|
||||
target_shape: Target size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
A tuple containing:
|
||||
- Resized image on a blank canvas with shape (height, width, 3).
|
||||
- The resize factor as a float.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
@@ -43,28 +61,21 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
image_size: Input image size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
@@ -82,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
|
||||
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
dets: Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold: IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -122,19 +132,25 @@ def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
def decode_boxes(
|
||||
loc: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode locations from predictions using priors (RetinaFace specific).
|
||||
|
||||
Undoes the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
loc: Location predictions for loc layers, shape: [num_priors, 4].
|
||||
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
|
||||
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
Decoded bounding box predictions with shape [num_priors, 4].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
@@ -149,18 +165,23 @@ def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
predictions: Landmark predictions, shape: [num_priors, 10].
|
||||
priors: Prior boxes, shape: [num_priors, 4].
|
||||
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
Decoded landmarks, shape: [num_priors, 10].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
@@ -176,3 +197,65 @@ def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to 4 boundaries
|
||||
(left, top, right, bottom) with shape (n, 4).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
|
||||
if max_shape is not None:
|
||||
x1 = np.clip(x1, 0, max_shape[1])
|
||||
y1 = np.clip(y1, 0, max_shape[0])
|
||||
x2 = np.clip(x2, 0, max_shape[1])
|
||||
y2 = np.clip(y2, 0, max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = np.clip(px, 0, max_shape[1])
|
||||
py = np.clip(py, 0, max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
class SphereFaceWeights(str, Enum):
|
||||
@@ -54,6 +54,22 @@ class SCRFDWeights(str, Enum):
|
||||
SCRFD_500M_KPS = "scrfd_500m"
|
||||
|
||||
|
||||
class YOLOv5FaceWeights(str, Enum):
|
||||
"""
|
||||
Trained on WIDER FACE dataset.
|
||||
Original implementation: https://github.com/deepcam-cn/yolov5-face
|
||||
Exported to ONNX from: https://github.com/yakhyo/yolov5-face-onnx-inference
|
||||
|
||||
Model Performance (WIDER FACE):
|
||||
- YOLOV5N: 11MB, 93.61% Easy / 91.52% Medium / 80.53% Hard
|
||||
- YOLOV5S: 28MB, 94.33% Easy / 92.61% Medium / 83.15% Hard
|
||||
- YOLOV5M: 82MB, 95.30% Easy / 93.76% Medium / 85.28% Hard
|
||||
"""
|
||||
YOLOV5N = "yolov5n"
|
||||
YOLOV5S = "yolov5s"
|
||||
YOLOV5M = "yolov5m"
|
||||
|
||||
|
||||
class DDAMFNWeights(str, Enum):
|
||||
"""
|
||||
Trained on AffectNet dataset.
|
||||
@@ -71,6 +87,15 @@ class AgeGenderWeights(str, Enum):
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class FairFaceWeights(str, Enum):
|
||||
"""
|
||||
FairFace attribute prediction (race, gender, age).
|
||||
Trained on FairFace dataset with balanced demographics.
|
||||
https://github.com/yakhyo/fairface-onnx
|
||||
"""
|
||||
DEFAULT = "fairface"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
@@ -78,87 +103,140 @@ class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
# fmt: on
|
||||
|
||||
class GazeWeights(str, Enum):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation models.
|
||||
Trained on Gaze360 dataset.
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
"""
|
||||
RESNET18 = "gaze_resnet18"
|
||||
RESNET34 = "gaze_resnet34"
|
||||
RESNET50 = "gaze_resnet50"
|
||||
MOBILENET_V2 = "gaze_mobilenetv2"
|
||||
MOBILEONE_S0 = "gaze_mobileone_s0"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
class ParsingWeights(str, Enum):
|
||||
"""
|
||||
Face Parsing: Semantic Segmentation of Facial Components.
|
||||
Trained on CelebAMask-HQ dataset.
|
||||
https://github.com/yakhyo/face-parsing
|
||||
"""
|
||||
RESNET18 = "parsing_resnet18"
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
class MiniFASNetWeights(str, Enum):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing models.
|
||||
Trained on face anti-spoofing datasets.
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Model Variants:
|
||||
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
|
||||
- V2: Uses scale=2.7 for face crop (improved version)
|
||||
"""
|
||||
V1SE = "minifasnet_v1se"
|
||||
V2 = "minifasnet_v2"
|
||||
|
||||
|
||||
MODEL_URLS: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
|
||||
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
|
||||
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5n_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5s_face.onnx',
|
||||
YOLOv5FaceWeights.YOLOV5M: 'https://github.com/yakhyo/yolov5-face-onnx-inference/releases/download/weights/yolov5m_face.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
GazeWeights.RESNET18: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet18_gaze.onnx',
|
||||
GazeWeights.RESNET34: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet34_gaze.onnx',
|
||||
GazeWeights.RESNET50: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/resnet50_gaze.onnx',
|
||||
GazeWeights.MOBILENET_V2: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobilenetv2_gaze.onnx',
|
||||
GazeWeights.MOBILEONE_S0: 'https://github.com/yakhyo/gaze-estimation/releases/download/weights/mobileone_s0_gaze.onnx',
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
|
||||
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
MODEL_SHA256: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
|
||||
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
|
||||
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
# YOLOv5-Face
|
||||
YOLOv5FaceWeights.YOLOV5N: 'eb244a06e36999db732b317c2b30fa113cd6cfc1a397eaf738f2d6f33c01f640',
|
||||
YOLOv5FaceWeights.YOLOV5S: 'fc682801cd5880e1e296184a14aea0035486b5146ec1a1389d2e7149cb134bb2',
|
||||
YOLOv5FaceWeights.YOLOV5M: '04302ce27a15bde3e20945691b688e2dd018a10e92dd8932146bede6a49207b2',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
GazeWeights.RESNET18: '23d5d7e4f6f40dce8c35274ce9d08b45b9e22cbaaf5af73182f473229d713d31',
|
||||
GazeWeights.RESNET34: '4457ee5f7acd1a5ab02da4b61f02fc3a0b17adbf3844dd0ba3cd4288f2b5e1de',
|
||||
GazeWeights.RESNET50: 'e1eaf98f5ec7c89c6abe7cfe39f7be83e747163f98d1ff945c0603b3c521be22',
|
||||
GazeWeights.MOBILENET_V2: 'fdcdb84e3e6421b5a79e8f95139f249fc258d7f387eed5ddac2b80a9a15ce076',
|
||||
GazeWeights.MOBILEONE_S0: 'c0b5a4f4a0ffd24f76ab3c1452354bb2f60110899fd9a88b464c75bafec0fde8',
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
|
||||
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -2,45 +2,53 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple, Dict, Any, List
|
||||
|
||||
from .scrfd import SCRFD
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
# Global cache for detector instances (keyed by method name + config hash)
|
||||
_detector_cache: dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
|
||||
"""High-level face detection function.
|
||||
|
||||
Detects faces in an image using the specified detection method.
|
||||
Results are cached for repeated calls with the same configuration.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd'.
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
A list of Face objects, each containing:
|
||||
- bbox: [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence: The confidence score of the detection.
|
||||
- landmarks: 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> import cv2
|
||||
>>> image = cv2.imread('your_image.jpg')
|
||||
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
... print(f'Found face with confidence: {face.confidence}')
|
||||
... print(f'BBox: {face.bbox}')
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f"{method_name}_{str(sorted_kwargs)}"
|
||||
cache_key = f'{method_name}_{sorted_kwargs!s}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
@@ -50,40 +58,36 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
method: Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
**kwargs: Detector-specific parameters
|
||||
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> from uniface.constants import SCRFDWeights
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... 'scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.8, input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> from uniface.constants import RetinaFaceWeights
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... 'retinaface', model_name=RetinaFaceWeights.MNET_V2, confidence_threshold=0.8, nms_threshold=0.4
|
||||
... )
|
||||
"""
|
||||
method = method.lower()
|
||||
@@ -94,20 +98,20 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
elif method == 'scrfd':
|
||||
return SCRFD(**kwargs)
|
||||
|
||||
elif method == 'yolov5face':
|
||||
return YOLOv5Face(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd']
|
||||
raise ValueError(
|
||||
f"Unsupported detection method: '{method}'. "
|
||||
f"Available methods: {available_methods}"
|
||||
)
|
||||
available_methods = ['retinaface', 'scrfd', 'yolov5face']
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
"""List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
Dictionary mapping detector names to their information including
|
||||
description, landmark support, paper reference, and default parameters.
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
@@ -116,10 +120,10 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
'scrfd': {
|
||||
'description': 'SCRFD detector - fast and accurate with efficient architecture',
|
||||
@@ -127,19 +131,31 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
}
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
'yolov5face': {
|
||||
'description': 'YOLOv5-Face detector - accurate face detection with landmarks',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'confidence_threshold': 0.25,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'RetinaFace',
|
||||
'BaseDetector',
|
||||
'RetinaFace',
|
||||
'YOLOv5Face',
|
||||
'create_detector',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
]
|
||||
|
||||
@@ -2,75 +2,86 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Base classes for face detection.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Dict, Any
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['BaseDetector']
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
Abstract base class for all face detectors.
|
||||
"""Abstract base class for all face detectors.
|
||||
|
||||
This class defines the interface that all face detectors must implement,
|
||||
ensuring consistency across different detection methods.
|
||||
|
||||
Attributes:
|
||||
config: Dictionary containing detector configuration parameters.
|
||||
_supports_landmarks: Flag indicating if detector supports landmark detection.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the detector with configuration parameters."""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the detector with configuration parameters.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array with shape (H, W, C)
|
||||
**kwargs: Additional detection parameters
|
||||
**kwargs: Detector-specific configuration parameters.
|
||||
"""
|
||||
self.config: dict[str, Any] = kwargs
|
||||
self._supports_landmarks: bool = False
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
- detections: Bounding boxes with confidence scores, shape (N, 5)
|
||||
Format: [x_min, y_min, x_max, y_max, confidence]
|
||||
- landmarks: Facial landmark points, shape (N, 5, 2) for 5-point landmarks
|
||||
or (N, 68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
List of detected Face objects, each containing:
|
||||
- bbox: Bounding box coordinates with shape (4,) as [x1, y1, x2, y2].
|
||||
- confidence: Detection confidence score (0.0 to 1.0).
|
||||
- landmarks: Facial landmarks with shape (5, 2) for 5-point landmarks.
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess input image for detection.
|
||||
"""Preprocess input image for detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor
|
||||
Preprocessed image tensor ready for inference.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
def postprocess(self, outputs: Any, **kwargs: Any) -> Any:
|
||||
"""Postprocess model outputs to get final detections.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
**kwargs: Additional postprocessing parameters
|
||||
outputs: Raw model outputs.
|
||||
**kwargs: Additional postprocessing parameters.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
Processed outputs (implementation-specific format, typically tuple of arrays).
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
return f"{self.__class__.__name__}({self.config})"
|
||||
return f'{self.__class__.__name__}({self.config})'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Detailed string representation."""
|
||||
@@ -78,23 +89,33 @@ class BaseDetector(ABC):
|
||||
|
||||
@property
|
||||
def supports_landmarks(self) -> bool:
|
||||
"""
|
||||
Whether this detector supports landmark detection.
|
||||
"""Whether this detector supports landmark detection.
|
||||
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
True if landmarks are supported, False otherwise.
|
||||
"""
|
||||
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detector information and configuration.
|
||||
def get_info(self) -> dict[str, Any]:
|
||||
"""Get detector information and configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Detector information
|
||||
Dictionary containing detector name, landmark support, and config.
|
||||
"""
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config
|
||||
'config': self.config,
|
||||
}
|
||||
|
||||
def __call__(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Callable shortcut for the `detect` method.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
List of detected Face objects.
|
||||
"""
|
||||
return self.detect(image, **kwargs)
|
||||
|
||||
@@ -2,23 +2,26 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List, Literal, Dict, Any
|
||||
|
||||
from uniface.common import (
|
||||
decode_boxes,
|
||||
decode_landmarks,
|
||||
generate_anchors,
|
||||
non_max_suppression,
|
||||
resize_image,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import (
|
||||
non_max_supression,
|
||||
resize_image,
|
||||
decode_boxes,
|
||||
generate_anchors,
|
||||
decode_landmarks
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace(BaseDetector):
|
||||
@@ -27,21 +30,24 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Title: "RetinaFace: Single-stage Dense Face Localisation in the Wild"
|
||||
Paper: https://arxiv.org/abs/1905.00641
|
||||
Code: https://github.com/yakhyo/retinaface-pytorch
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments passed to BaseDetector and RetinaFace. Supported keys include:
|
||||
model_name (RetinaFaceWeights, optional): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float, optional): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
pre_nms_topk (int, optional): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int, optional): Max number of detections kept after NMS. Defaults to 750.
|
||||
dynamic_size (bool, optional): If True, generate anchors dynamically per input image. Defaults to False.
|
||||
input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`. Defaults to (640, 640).
|
||||
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Fixed input size (width, height) if `dynamic_size=False`.
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
**kwargs: Advanced options:
|
||||
pre_nms_topk (int): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int): Max number of detections kept after NMS. Defaults to 750.
|
||||
dynamic_size (bool): If True, generate anchors dynamically per input image. Defaults to False.
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold for confidence-based filtering.
|
||||
nms_thresh (float): IoU threshold used for NMS.
|
||||
confidence_threshold (float): Threshold for confidence-based filtering.
|
||||
nms_threshold (float): IoU threshold used for NMS.
|
||||
pre_nms_topk (int): Limit on proposals before applying NMS.
|
||||
post_nms_topk (int): Limit on retained detections after NMS.
|
||||
dynamic_size (bool): Flag indicating dynamic or static input sizing.
|
||||
@@ -55,50 +61,65 @@ class RetinaFace(BaseDetector):
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2)
|
||||
self.conf_thresh = kwargs.get('conf_thresh', 0.5)
|
||||
self.nms_thresh = kwargs.get('nms_thresh', 0.4)
|
||||
self.model_name = model_name
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
|
||||
self.post_nms_topk = kwargs.get('post_nms_topk', 750)
|
||||
self.dynamic_size = kwargs.get('dynamic_size', False)
|
||||
self.input_size = kwargs.get('input_size', (640, 640))
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
|
||||
f"input_size={self.input_size}"
|
||||
f'Initializing RetinaFace with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not self.dynamic_size and self.input_size is not None:
|
||||
self._priors = generate_anchors(image_size=self.input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
Logger.debug('Generated anchors for static input size.')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
@@ -117,24 +138,25 @@ class RetinaFace(BaseDetector):
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs (location, confidence, landmarks).
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2.0
|
||||
) -> List[Dict[str, Any]]:
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -148,10 +170,19 @@ class RetinaFace(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -178,10 +209,12 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
@@ -198,67 +231,84 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4].astype(float).tolist(),
|
||||
'confidence': detections[i, 4].item(),
|
||||
'landmarks': landmarks[i].astype(float).tolist()
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
def postprocess(
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
resize_factor: float,
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
outputs: Raw outputs from the detection model containing:
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
resize_factor: Factor used to resize the input image during preprocessing.
|
||||
shape: Original shape of the image as (width, height).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
A tuple containing:
|
||||
- detections: Array of detected bounding boxes with confidence scores,
|
||||
shape (num_detections, 5), each row is [x1, y1, x2, y2, score].
|
||||
- landmarks: Array of detected facial landmarks,
|
||||
shape (num_detections, 5, 2), each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
location_predictions, confidence_scores, landmark_predictions = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
boxes = decode_boxes(location_predictions, self._priors)
|
||||
landmarks = decode_landmarks(landmark_predictions, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
scores = confidence_scores[:, 1]
|
||||
mask = scores > self.confidence_threshold
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
order = scores.argsort()[::-1][: self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_supression(detections, self.nms_thresh)
|
||||
keep = non_max_suppression(detections, self.nms_threshold)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
detections, landmarks = (
|
||||
detections[: self.post_nms_topk],
|
||||
landmarks[: self.post_nms_topk],
|
||||
)
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.float32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
def _scale_detections(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Scale bounding boxes and landmarks to the original image size."""
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
@@ -266,64 +316,3 @@ class RetinaFace(BaseDetector):
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for (x, y) in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import cv2
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -2,20 +2,22 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import distance2bbox, distance2kps, non_max_supression, resize_image
|
||||
|
||||
__all__ = ["SCRFD"]
|
||||
__all__ = ['SCRFD']
|
||||
|
||||
|
||||
class SCRFD(BaseDetector):
|
||||
@@ -24,20 +26,24 @@ class SCRFD(BaseDetector):
|
||||
|
||||
Title: "Sample and Computation Redistribution for Efficient Face Detection"
|
||||
Paper: https://arxiv.org/abs/2105.04714
|
||||
Code: https://github.com/insightface/insightface
|
||||
|
||||
Args:
|
||||
**kwargs: Keyword arguments passed to BaseDetector and SCRFD. Supported keys include:
|
||||
model_name (SCRFDWeights, optional): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
conf_thresh (float, optional): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float, optional): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int], optional): Input image size (width, height). Defaults to (640, 640).
|
||||
model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Input image size (width, height).
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
**kwargs: Reserved for future advanced options.
|
||||
|
||||
Attributes:
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
model_name (SCRFDWeights): Selected model variant.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
|
||||
_fmc (int): Number of feature map levels used in the model.
|
||||
_num_feature_maps (int): Number of feature map levels used in the model.
|
||||
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
|
||||
_num_anchors (int): Number of anchors per feature location.
|
||||
_center_cache (Dict): Cached anchor centers for efficient forward passes.
|
||||
@@ -48,65 +54,74 @@ class SCRFD(BaseDetector):
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
model_name = kwargs.get("model_name", SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
input_size = kwargs.get("input_size", (640, 640))
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.model_name = model_name
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# ------- SCRFD model params ------
|
||||
self._fmc = 3
|
||||
self._num_feature_maps = 3
|
||||
self._feat_stride_fpn = [8, 16, 32]
|
||||
self._num_anchors = 2
|
||||
self._center_cache = {}
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f"Initializing SCRFD with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"input_size={input_size}"
|
||||
f'Initializing SCRFD with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
|
||||
Preprocessed image tensor with shape (1, C, H, W).
|
||||
"""
|
||||
image = image.astype(np.float32)
|
||||
image = (image - 127.5) / 127.5
|
||||
@@ -115,29 +130,42 @@ class SCRFD(BaseDetector):
|
||||
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
|
||||
scores_list = []
|
||||
def postprocess(
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
image_size: tuple[int, int],
|
||||
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
|
||||
"""Process model outputs into detection results.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the detection model.
|
||||
image_size: Size of the input image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple of (scores_list, bboxes_list, landmarks_list).
|
||||
"""
|
||||
scores_list: list[np.ndarray] = []
|
||||
bboxes_list = []
|
||||
kpss_list = []
|
||||
|
||||
image_size = image_size
|
||||
|
||||
fmc = self._fmc
|
||||
num_feature_maps = self._num_feature_maps
|
||||
for idx, stride in enumerate(self._feat_stride_fpn):
|
||||
scores = outputs[idx]
|
||||
bbox_preds = outputs[fmc + idx] * stride
|
||||
kps_preds = outputs[2 * fmc + idx] * stride
|
||||
bbox_preds = outputs[num_feature_maps + idx] * stride
|
||||
kps_preds = outputs[2 * num_feature_maps + idx] * stride
|
||||
|
||||
# Generate anchors
|
||||
fm_height = image_size[0] // stride
|
||||
@@ -157,7 +185,7 @@ class SCRFD(BaseDetector):
|
||||
if len(self._center_cache) < 100:
|
||||
self._center_cache[cache_key] = anchor_centers
|
||||
|
||||
pos_indices = np.where(scores >= self.conf_thresh)[0]
|
||||
pos_indices = np.where(scores >= self.confidence_threshold)[0]
|
||||
if len(pos_indices) == 0:
|
||||
continue
|
||||
|
||||
@@ -173,8 +201,13 @@ class SCRFD(BaseDetector):
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
|
||||
) -> List[Dict[str, Any]]:
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -188,10 +221,19 @@ class SCRFD(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -219,11 +261,11 @@ class SCRFD(BaseDetector):
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_supression(pre_det, threshold=self.nms_thresh)
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_threshold)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
landmarks = landmarks[keep, :, :].astype(np.int32)
|
||||
landmarks = landmarks[keep, :, :].astype(np.float32)
|
||||
|
||||
if 0 < max_num < detections.shape[0]:
|
||||
# Calculate area of detections
|
||||
@@ -240,7 +282,7 @@ class SCRFD(BaseDetector):
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == "max":
|
||||
if metric == 'max':
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
@@ -252,70 +294,11 @@ class SCRFD(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -1,232 +0,0 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import itertools
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
# Aspect-ratio preserving resize
|
||||
im_ratio = float(frame.shape[0]) / frame.shape[1]
|
||||
model_ratio = height / width
|
||||
if im_ratio > model_ratio:
|
||||
new_height = height
|
||||
new_width = int(new_height / im_ratio)
|
||||
else:
|
||||
new_width = width
|
||||
new_height = int(new_width * im_ratio)
|
||||
|
||||
resize_factor = float(new_height) / frame.shape[0]
|
||||
resized_frame = cv2.resize(frame, (new_width, new_height))
|
||||
|
||||
# Create blank image and place resized image on it
|
||||
image = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
image[:new_height, :new_width, :] = resized_frame
|
||||
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
for i, j in itertools.product(range(map_height), range(map_width)):
|
||||
for min_size in min_sizes[k]:
|
||||
s_kx = min_size / image_size[1]
|
||||
s_ky = min_size / image_size[0]
|
||||
|
||||
dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
|
||||
dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= threshold)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
"""
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to bounding box.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded bounding boxes with shape (n, 4).
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
if max_shape is not None:
|
||||
x1 = x1.clamp(min=0, max=max_shape[1])
|
||||
y1 = y1.clamp(min=0, max=max_shape[0])
|
||||
x2 = x2.clamp(min=0, max=max_shape[1])
|
||||
y2 = y2.clamp(min=0, max=max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to keypoints.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = px.clamp(min=0, max=max_shape[1])
|
||||
py = py.clamp(min=0, max=max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
341
uniface/detection/yolov5.py
Normal file
341
uniface/detection/yolov5.py
Normal file
@@ -0,0 +1,341 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import non_max_suppression
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
__all__ = ['YOLOv5Face']
|
||||
|
||||
|
||||
class YOLOv5Face(BaseDetector):
|
||||
"""
|
||||
Face detector based on the YOLOv5-Face architecture.
|
||||
|
||||
Title: "YOLO5Face: Why Reinventing a Face Detector"
|
||||
Paper: https://arxiv.org/abs/2105.12931
|
||||
Code: https://github.com/yakhyo/yolov5-face-onnx-inference (ONNX inference implementation)
|
||||
|
||||
Args:
|
||||
model_name (YOLOv5FaceWeights): Predefined model enum (e.g., `YOLOV5S`).
|
||||
Specifies the YOLOv5-Face variant to load. Defaults to YOLOV5S.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
input_size (int): Input image size. Defaults to 640.
|
||||
Note: ONNX model is fixed at 640. Changing this will cause inference errors.
|
||||
**kwargs: Advanced options:
|
||||
max_det (int): Maximum number of detections to return. Defaults to 750.
|
||||
|
||||
Attributes:
|
||||
model_name (YOLOv5FaceWeights): Selected model variant.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (int): Image size to which inputs are resized before inference.
|
||||
max_det (int): Maximum number of detections to return.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model weights are invalid or not found.
|
||||
RuntimeError: If the ONNX model fails to load or initialize.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model_name: YOLOv5FaceWeights = YOLOv5FaceWeights.YOLOV5S,
|
||||
confidence_threshold: float = 0.6,
|
||||
nms_threshold: float = 0.5,
|
||||
input_size: int = 640,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # YOLOv5-Face supports landmarks
|
||||
|
||||
# Validate input size
|
||||
if input_size != 640:
|
||||
raise ValueError(
|
||||
f'YOLOv5Face only supports input_size=640 (got {input_size}). The ONNX model has a fixed input shape.'
|
||||
)
|
||||
|
||||
self.model_name = model_name
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.max_det = kwargs.get('max_det', 750)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> tuple[np.ndarray, float, tuple[int, int]]:
|
||||
"""
|
||||
Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format)
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float, Tuple[int, int]]: Preprocessed image, scale ratio, and padding
|
||||
"""
|
||||
# Get original image shape
|
||||
img_h, img_w = image.shape[:2]
|
||||
|
||||
# Calculate scale ratio
|
||||
scale = min(self.input_size / img_h, self.input_size / img_w)
|
||||
new_h, new_w = int(img_h * scale), int(img_w * scale)
|
||||
|
||||
# Resize image
|
||||
img_resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Create padded image
|
||||
img_padded = np.full((self.input_size, self.input_size, 3), 114, dtype=np.uint8)
|
||||
|
||||
# Calculate padding
|
||||
pad_h = (self.input_size - new_h) // 2
|
||||
pad_w = (self.input_size - new_w) // 2
|
||||
|
||||
# Place resized image in center
|
||||
img_padded[pad_h : pad_h + new_h, pad_w : pad_w + new_w] = img_resized
|
||||
|
||||
# Convert to RGB and normalize
|
||||
img_rgb = cv2.cvtColor(img_padded, cv2.COLOR_BGR2RGB)
|
||||
img_normalized = img_rgb.astype(np.float32) / 255.0
|
||||
|
||||
# Transpose to CHW format (HWC -> CHW) and add batch dimension
|
||||
img_transposed = np.transpose(img_normalized, (2, 0, 1))
|
||||
img_batch = np.expand_dims(img_transposed, axis=0)
|
||||
img_batch = np.ascontiguousarray(img_batch)
|
||||
|
||||
return img_batch, scale, (pad_w, pad_h)
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
|
||||
Returns:
|
||||
List[np.ndarray]: Raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(
|
||||
self,
|
||||
predictions: np.ndarray,
|
||||
scale: float,
|
||||
padding: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model predictions.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Raw model output
|
||||
scale (float): Scale ratio used in preprocessing
|
||||
padding (Tuple[int, int]): Padding used in preprocessing
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Filtered detections and landmarks
|
||||
- detections: [x1, y1, x2, y2, conf]
|
||||
- landmarks: [5, 2] for each detection
|
||||
"""
|
||||
# predictions shape: (1, 25200, 16)
|
||||
# 16 = [x, y, w, h, obj_conf, cls_conf, 10 landmarks (5 points * 2 coords)]
|
||||
|
||||
predictions = predictions[0] # Remove batch dimension
|
||||
|
||||
# Filter by confidence
|
||||
mask = predictions[:, 4] >= self.confidence_threshold
|
||||
predictions = predictions[mask]
|
||||
|
||||
if len(predictions) == 0:
|
||||
return np.array([]), np.array([])
|
||||
|
||||
# Convert from xywh to xyxy
|
||||
boxes = self._xywh2xyxy(predictions[:, :4])
|
||||
|
||||
# Get confidence scores
|
||||
scores = predictions[:, 4]
|
||||
|
||||
# Get landmarks (5 points, 10 coordinates)
|
||||
landmarks = predictions[:, 5:15].copy()
|
||||
|
||||
# Apply NMS
|
||||
detections_for_nms = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections_for_nms, self.nms_threshold)
|
||||
|
||||
if len(keep) == 0:
|
||||
return np.array([]), np.array([])
|
||||
|
||||
# Filter detections and limit to max_det
|
||||
keep = keep[: self.max_det]
|
||||
boxes = boxes[keep]
|
||||
scores = scores[keep]
|
||||
landmarks = landmarks[keep]
|
||||
|
||||
# Scale back to original image coordinates
|
||||
pad_w, pad_h = padding
|
||||
boxes[:, [0, 2]] = (boxes[:, [0, 2]] - pad_w) / scale
|
||||
boxes[:, [1, 3]] = (boxes[:, [1, 3]] - pad_h) / scale
|
||||
|
||||
# Scale landmarks
|
||||
for i in range(5):
|
||||
landmarks[:, i * 2] = (landmarks[:, i * 2] - pad_w) / scale
|
||||
landmarks[:, i * 2 + 1] = (landmarks[:, i * 2 + 1] - pad_h) / scale
|
||||
|
||||
# Reshape landmarks to (N, 5, 2)
|
||||
landmarks = landmarks.reshape(-1, 5, 2)
|
||||
|
||||
# Combine results
|
||||
detections = np.concatenate([boxes, scores[:, None]], axis=1)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _xywh2xyxy(self, x: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Convert bounding box format from xywh to xyxy.
|
||||
|
||||
Args:
|
||||
x (np.ndarray): Boxes in [x, y, w, h] format
|
||||
|
||||
Returns:
|
||||
np.ndarray: Boxes in [x1, y1, x2, y2] format
|
||||
"""
|
||||
y = np.copy(x)
|
||||
y[..., 0] = x[..., 0] - x[..., 2] / 2 # x1
|
||||
y[..., 1] = x[..., 1] - x[..., 3] / 2 # y1
|
||||
y[..., 2] = x[..., 0] + x[..., 2] / 2 # x2
|
||||
y[..., 3] = x[..., 1] + x[..., 3] / 2 # y2
|
||||
return y
|
||||
|
||||
def detect(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
*,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array of shape (H, W, C).
|
||||
max_num (int): Maximum number of detections to return. Use 0 to return all detections. Defaults to 0.
|
||||
metric (Literal["default", "max"]): Metric for ranking detections when `max_num` is limited.
|
||||
- "default": Prioritize detections closer to the image center.
|
||||
- "max": Prioritize detections with larger bounding box areas.
|
||||
center_weight (float): Weight for penalizing detections farther from the image center
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
|
||||
# Preprocess
|
||||
image_tensor, scale, padding = self.preprocess(image)
|
||||
|
||||
# ONNXRuntime inference
|
||||
outputs = self.inference(image_tensor)
|
||||
|
||||
# Postprocess
|
||||
detections, landmarks = self.postprocess(outputs[0], scale, padding)
|
||||
|
||||
# Handle case when no faces are detected
|
||||
if len(detections) == 0:
|
||||
return []
|
||||
|
||||
if 0 < max_num < detections.shape[0]:
|
||||
# Calculate area of detections
|
||||
area = (detections[:, 2] - detections[:, 0]) * (detections[:, 3] - detections[:, 1])
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == 'max':
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
|
||||
# Sort by scores and select top `max_num`
|
||||
sorted_indices = np.argsort(values)[::-1][:max_num]
|
||||
detections = detections[sorted_indices]
|
||||
landmarks = landmarks[sorted_indices]
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
@@ -2,46 +2,54 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
from typing import Tuple, Union
|
||||
|
||||
__all__ = [
|
||||
'bbox_center_alignment',
|
||||
'compute_similarity',
|
||||
'face_alignment',
|
||||
'transform_points_2d',
|
||||
]
|
||||
|
||||
|
||||
__all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
# Standard 5-point facial landmark reference for ArcFace alignment (112x112)
|
||||
reference_alignment: np.ndarray = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Estimate the normalization transformation matrix for facial landmarks.
|
||||
def estimate_norm(
|
||||
landmark: np.ndarray,
|
||||
image_size: int | tuple[int, int] = 112,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Estimate the normalization transformation matrix for facial landmarks.
|
||||
|
||||
Args:
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
landmark: Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size: The size of the output image. Can be an integer (for square images)
|
||||
or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
|
||||
np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
|
||||
A tuple containing:
|
||||
- The 2x3 transformation matrix for aligning the landmarks.
|
||||
- The 2x3 inverse transformation matrix.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the input landmark array does not have the shape (5, 2)
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
"""
|
||||
assert landmark.shape == (5, 2), "Landmark array must have shape (5, 2)."
|
||||
assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
|
||||
|
||||
# Handle both int and tuple inputs
|
||||
if isinstance(image_size, tuple):
|
||||
@@ -49,7 +57,7 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
else:
|
||||
size = image_size
|
||||
|
||||
assert size % 112 == 0 or size % 128 == 0, "Image size must be a multiple of 112 or 128."
|
||||
assert size % 112 == 0 or size % 128 == 0, 'Image size must be a multiple of 112 or 128.'
|
||||
|
||||
if size % 112 == 0:
|
||||
ratio = float(size) / 112.0
|
||||
@@ -72,22 +80,26 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
return matrix, inverse_matrix
|
||||
|
||||
|
||||
def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: int | tuple[int, int] = 112,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array.
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
image: Input image as a NumPy array with shape (H, W, C).
|
||||
landmark: Array of shape (5, 2) representing the facial landmark coordinates.
|
||||
image_size: The size of the aligned output image. Can be an integer
|
||||
(for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The aligned face as a NumPy array.
|
||||
np.ndarray: The 2x3 transformation matrix used for alignment.
|
||||
A tuple containing:
|
||||
- The aligned face as a NumPy array.
|
||||
- The 2x3 inverse transformation matrix used for alignment.
|
||||
"""
|
||||
# Get the transformation matrix
|
||||
M, M_inv = estimate_norm(landmark, image_size)
|
||||
transform_matrix, inverse_transform = estimate_norm(landmark, image_size)
|
||||
|
||||
# Handle both int and tuple for warpAffine output size
|
||||
if isinstance(image_size, int):
|
||||
@@ -96,44 +108,50 @@ def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[in
|
||||
output_size = image_size
|
||||
|
||||
# Warp the input image to align the face
|
||||
warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
|
||||
warped = cv2.warpAffine(image, transform_matrix, output_size, borderValue=0.0)
|
||||
|
||||
return warped, M_inv
|
||||
return warped, inverse_transform
|
||||
|
||||
|
||||
def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
|
||||
"""Computing Similarity between two faces.
|
||||
"""Compute cosine similarity between two face embeddings.
|
||||
|
||||
Args:
|
||||
feat1 (np.ndarray): First embedding.
|
||||
feat2 (np.ndarray): Second embedding.
|
||||
normalized (bool): Set True if the embeddings are already L2 normalized.
|
||||
feat1: First embedding vector.
|
||||
feat2: Second embedding vector.
|
||||
normalized: Set True if the embeddings are already L2 normalized.
|
||||
|
||||
Returns:
|
||||
np.float32: Cosine similarity.
|
||||
Cosine similarity score in range [-1, 1].
|
||||
"""
|
||||
feat1 = feat1.ravel()
|
||||
feat2 = feat2.ravel()
|
||||
if normalized:
|
||||
return np.dot(feat1, feat2)
|
||||
else:
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
# Add small epsilon to prevent division by zero
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
|
||||
|
||||
def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
"""
|
||||
Apply center-based alignment, scaling, and rotation to an image.
|
||||
def bbox_center_alignment(
|
||||
image: np.ndarray,
|
||||
center: tuple[float, float],
|
||||
output_size: int,
|
||||
scale: float,
|
||||
rotation: float,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Apply center-based alignment, scaling, and rotation to an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
center (Tuple[float, float]): Center point (e.g., face center from bbox).
|
||||
output_size (int): Desired output image size (square).
|
||||
scale (float): Scaling factor to zoom in/out.
|
||||
rotation (float): Rotation angle in degrees (clockwise).
|
||||
image: Input image with shape (H, W, C).
|
||||
center: Center point (x, y), e.g., face center from bbox.
|
||||
output_size: Desired output image size (square).
|
||||
scale: Scaling factor to zoom in/out.
|
||||
rotation: Rotation angle in degrees (clockwise).
|
||||
|
||||
Returns:
|
||||
cropped (np.ndarray): Aligned and cropped image.
|
||||
M (np.ndarray): 2x3 affine transform matrix used.
|
||||
A tuple containing:
|
||||
- Aligned and cropped image with shape (output_size, output_size, C).
|
||||
- 2x3 affine transform matrix used.
|
||||
"""
|
||||
|
||||
# Convert rotation from degrees to radians
|
||||
@@ -166,15 +184,14 @@ def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
|
||||
|
||||
def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply a 2D affine transformation to an array of 2D points.
|
||||
"""Apply a 2D affine transformation to an array of 2D points.
|
||||
|
||||
Args:
|
||||
points (np.ndarray): An (N, 2) array of 2D points.
|
||||
transform (np.ndarray): A (2, 3) affine transformation matrix.
|
||||
points: An (N, 2) array of 2D points.
|
||||
transform: A (2, 3) affine transformation matrix.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Transformed (N, 2) array of points.
|
||||
Transformed (N, 2) array of points.
|
||||
"""
|
||||
transformed = np.zeros_like(points, dtype=np.float32)
|
||||
for i in range(points.shape[0]):
|
||||
|
||||
54
uniface/gaze/__init__.py
Normal file
54
uniface/gaze/__init__.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from uniface.types import GazeResult
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
from .models import MobileGaze
|
||||
|
||||
|
||||
def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstimator:
|
||||
"""
|
||||
Factory function to create gaze estimators.
|
||||
|
||||
This function initializes and returns a gaze estimator instance based on the
|
||||
specified method. It acts as a high-level interface to the underlying
|
||||
model classes.
|
||||
|
||||
Args:
|
||||
method (str): The gaze estimation method to use.
|
||||
Options: 'mobilegaze' (default).
|
||||
**kwargs: Model-specific parameters passed to the estimator's constructor.
|
||||
For example, `model_name` can be used to select a specific
|
||||
backbone from `GazeWeights` enum (RESNET18, RESNET34, RESNET50,
|
||||
MOBILENET_V2, MOBILEONE_S0).
|
||||
|
||||
Returns:
|
||||
BaseGazeEstimator: An initialized gaze estimator instance ready for use.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default MobileGaze estimator (ResNet18 backbone)
|
||||
>>> estimator = create_gaze_estimator()
|
||||
|
||||
>>> # Create with MobileNetV2 backbone
|
||||
>>> from uniface.constants import GazeWeights
|
||||
>>> estimator = create_gaze_estimator('mobilegaze', model_name=GazeWeights.MOBILENET_V2)
|
||||
|
||||
>>> # Use the estimator
|
||||
>>> result = estimator.estimate(face_crop)
|
||||
>>> print(f'Pitch: {result.pitch}, Yaw: {result.yaw}')
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method in ('mobilegaze', 'mobile_gaze', 'gaze'):
|
||||
return MobileGaze(**kwargs)
|
||||
else:
|
||||
available = ['mobilegaze']
|
||||
raise ValueError(f"Unsupported gaze estimation method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = ['BaseGazeEstimator', 'GazeResult', 'MobileGaze', 'create_gaze_estimator']
|
||||
113
uniface/gaze/base.py
Normal file
113
uniface/gaze/base.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import GazeResult
|
||||
|
||||
__all__ = ['BaseGazeEstimator', 'GazeResult']
|
||||
|
||||
|
||||
class BaseGazeEstimator(ABC):
|
||||
"""
|
||||
Abstract base class for all gaze estimation models.
|
||||
|
||||
This class defines the common interface that all gaze estimators must implement,
|
||||
ensuring consistency across different gaze estimation methods. Gaze estimation
|
||||
predicts the direction a person is looking based on their face image.
|
||||
|
||||
The gaze direction is represented as pitch and yaw angles in radians:
|
||||
- Pitch: Vertical angle (positive = looking up, negative = looking down)
|
||||
- Yaw: Horizontal angle (positive = looking right, negative = looking left)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input face image for model inference.
|
||||
|
||||
This method should take a raw face crop and convert it into the format
|
||||
expected by the model's inference engine (e.g., normalized tensor).
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format with
|
||||
shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into pitch and yaw angles in radians.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference. The format depends
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def estimate(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the gaze direction.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
The face should be roughly centered and
|
||||
well-framed within the image.
|
||||
|
||||
Returns:
|
||||
GazeResult: Result containing pitch and yaw angles in radians:
|
||||
- pitch: Vertical gaze angle (positive = up, negative = down)
|
||||
- yaw: Horizontal gaze angle (positive = right, negative = left)
|
||||
|
||||
Example:
|
||||
>>> estimator = create_gaze_estimator()
|
||||
>>> result = estimator.estimate(face_crop)
|
||||
>>> print(f'Looking: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the estimate method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `estimate` method.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
|
||||
Returns:
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
return self.estimate(face_image)
|
||||
186
uniface/gaze/models.py
Normal file
186
uniface/gaze/models.py
Normal file
@@ -0,0 +1,186 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import GazeWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import GazeResult
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
|
||||
__all__ = ['MobileGaze']
|
||||
|
||||
|
||||
class MobileGaze(BaseGazeEstimator):
|
||||
"""
|
||||
MobileGaze: Real-Time Gaze Estimation with ONNX Runtime.
|
||||
|
||||
MobileGaze is a gaze estimation model that predicts gaze direction from a single
|
||||
face image. It supports multiple backbone architectures including ResNet 18/34/50,
|
||||
MobileNetV2, and MobileOne S0. The model uses a classification approach with binned
|
||||
angles, which are then decoded to continuous pitch and yaw values.
|
||||
|
||||
The model outputs gaze direction as pitch (vertical) and yaw (horizontal) angles
|
||||
in radians.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/gaze-estimation
|
||||
|
||||
Args:
|
||||
model_name (GazeWeights): The enum specifying the gaze model backbone to load.
|
||||
Options: RESNET18, RESNET34, RESNET50, MOBILENET_V2, MOBILEONE_S0.
|
||||
Defaults to `GazeWeights.RESNET18`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (448, 448).
|
||||
|
||||
Attributes:
|
||||
input_size (Tuple[int, int]): Model input dimensions.
|
||||
input_mean (list): Per-channel mean values for normalization (ImageNet).
|
||||
input_std (list): Per-channel std values for normalization (ImageNet).
|
||||
|
||||
Example:
|
||||
>>> from uniface.gaze import MobileGaze
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> gaze_estimator = MobileGaze()
|
||||
>>>
|
||||
>>> # Detect faces and estimate gaze for each
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... result = gaze_estimator.estimate(face_crop)
|
||||
... print(f'Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: GazeWeights = GazeWeights.RESNET34,
|
||||
input_size: tuple[int, int] = (448, 448),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing MobileGaze with model={model_name}, input_size={input_size}')
|
||||
|
||||
self.input_size = input_size
|
||||
self.input_mean = [0.485, 0.456, 0.406]
|
||||
self.input_std = [0.229, 0.224, 0.225]
|
||||
|
||||
# Model specific parameters for bin-based classification (Gaze360 config)
|
||||
self._bins = 90
|
||||
self._binwidth = 4
|
||||
self._angle_offset = 180
|
||||
self._idx_tensor = np.arange(self._bins, dtype=np.float32)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
input_shape = input_cfg.shape
|
||||
self.input_name = input_cfg.name
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update from model
|
||||
|
||||
# Get output configuration
|
||||
outputs = self.session.get_outputs()
|
||||
self.output_names = [output.name for output in outputs]
|
||||
|
||||
if len(self.output_names) != 2:
|
||||
raise ValueError(f'Expected 2 output nodes (pitch, yaw), got {len(self.output_names)}')
|
||||
|
||||
Logger.info(f'MobileGaze initialized with input size {self.input_size}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load gaze model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize gaze model: {e}') from e
|
||||
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess a face crop for gaze estimation.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, 3, H, W).
|
||||
"""
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Resize to model input size
|
||||
image = cv2.resize(image, self.input_size)
|
||||
|
||||
# Normalize to [0, 1] and apply normalization
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array(self.input_mean, dtype=np.float32)
|
||||
std = np.array(self.input_std, dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0).astype(np.float32)
|
||||
|
||||
return image
|
||||
|
||||
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
||||
"""Apply softmax along axis 1."""
|
||||
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
||||
return e_x / e_x.sum(axis=1, keepdims=True)
|
||||
|
||||
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into pitch and yaw angles in radians.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference. The format depends
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
pitch_logits, yaw_logits = outputs
|
||||
|
||||
# Convert logits to probabilities
|
||||
pitch_probs = self._softmax(pitch_logits)
|
||||
yaw_probs = self._softmax(yaw_logits)
|
||||
|
||||
# Compute expected bin index (soft-argmax)
|
||||
pitch_deg = np.sum(pitch_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
|
||||
# Convert degrees to radians
|
||||
pitch = float(np.radians(pitch_deg[0]))
|
||||
yaw = float(np.radians(yaw_deg[0]))
|
||||
|
||||
return GazeResult(pitch=pitch, yaw=yaw)
|
||||
|
||||
def estimate(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the gaze direction.
|
||||
"""
|
||||
input_tensor = self.preprocess(face_image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
|
||||
return self.postprocess((outputs[0], outputs[1]))
|
||||
@@ -2,8 +2,8 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .models import Landmark106
|
||||
from .base import BaseLandmarker
|
||||
from .models import Landmark106
|
||||
|
||||
|
||||
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
@@ -25,8 +25,4 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_landmarker",
|
||||
"Landmark106",
|
||||
"BaseLandmarker"
|
||||
]
|
||||
__all__ = ['BaseLandmarker', 'Landmark106', 'create_landmarker']
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@@ -10,6 +11,7 @@ class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
@@ -28,3 +30,15 @@ class BaseLandmarker(ABC):
|
||||
where N is the number of landmarks.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""Callable shortcut for the `get_landmarks` method.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (N, 2).
|
||||
"""
|
||||
return self.get_landmarks(image, bbox)
|
||||
|
||||
@@ -2,18 +2,19 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import Tuple
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark']
|
||||
__all__ = ['Landmark106']
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
@@ -40,15 +41,13 @@ class Landmark106(BaseLandmarker):
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192)
|
||||
input_size: tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(
|
||||
f"Initializing Facial Landmark with model={model_name}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
@@ -79,13 +78,13 @@ class Landmark106(BaseLandmarker):
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
|
||||
Logger.info(f'Model initialized with {self.lmk_num} landmarks')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}")
|
||||
raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
|
||||
This method takes a face bounding box, performs a center alignment to
|
||||
@@ -108,8 +107,11 @@ class Landmark106(BaseLandmarker):
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, 1.0 / self.input_std, self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True,
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
@@ -129,7 +131,7 @@ class Landmark106(BaseLandmarker):
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= (self.input_size[0] // 2)
|
||||
landmarks[:, 0:2] *= self.input_size[0] // 2
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
@@ -149,64 +151,6 @@ class Landmark106(BaseLandmarker):
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(
|
||||
self.output_names, {self.input_names[0]: face_blob}
|
||||
)[0][0]
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
face_detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Webcam not available.")
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Frame capture failed.")
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face['bbox']
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for (x, y) in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -1,28 +1,45 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Logging utilities for UniFace.
|
||||
|
||||
This module provides a centralized logger for the UniFace library,
|
||||
allowing users to enable verbose logging when debugging or developing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
__all__ = ['Logger', 'enable_logging']
|
||||
|
||||
# Create logger for uniface
|
||||
Logger = logging.getLogger("uniface")
|
||||
Logger = logging.getLogger('uniface')
|
||||
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
|
||||
Logger.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Enable verbose logging for uniface.
|
||||
def enable_logging(level: int = logging.INFO) -> None:
|
||||
"""Enable verbose logging for uniface.
|
||||
|
||||
Configures the logger to output messages to stdout with timestamps.
|
||||
Call this function to see informational messages during model loading
|
||||
and inference.
|
||||
|
||||
Args:
|
||||
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
||||
level: Logging level. Defaults to logging.INFO.
|
||||
Common values: logging.DEBUG, logging.INFO, logging.WARNING.
|
||||
|
||||
Example:
|
||||
>>> from uniface import enable_logging
|
||||
>>> import logging
|
||||
>>> enable_logging() # Show INFO logs
|
||||
>>> enable_logging(level=logging.DEBUG) # Show DEBUG logs
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
))
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
||||
Logger.addHandler(handler)
|
||||
Logger.setLevel(level)
|
||||
Logger.propagate = False
|
||||
|
||||
@@ -2,45 +2,53 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
"""Model weight management for UniFace.
|
||||
|
||||
This module handles downloading, caching, and verifying model weights
|
||||
using SHA-256 checksums for integrity validation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.log import Logger
|
||||
import uniface.constants as const
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['verify_model_weights']
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
"""
|
||||
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
|
||||
def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> str:
|
||||
"""Ensure model weights are present, downloading and verifying them if necessary.
|
||||
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`), this function checks if
|
||||
the corresponding `.onnx` weight file exists locally. If not, it downloads the file from a predefined URL.
|
||||
After download, the file’s integrity is verified using a SHA-256 hash. If verification fails, the file is deleted
|
||||
and an error is raised.
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`),
|
||||
this function checks if the corresponding weight file exists locally. If not,
|
||||
it downloads the file from a predefined URL and verifies its integrity using
|
||||
a SHA-256 hash.
|
||||
|
||||
Args:
|
||||
model_name (Enum): Model weight identifier (e.g., `RetinaFaceWeights.MNET_V2`, `ArcFaceWeights.RESNET`, etc.).
|
||||
root (str, optional): Directory to store or locate the model weights. Defaults to '~/.uniface/models'.
|
||||
model_name: Model weight identifier enum (e.g., `RetinaFaceWeights.MNET_V2`).
|
||||
root: Directory to store or locate the model weights.
|
||||
Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the verified model weights file.
|
||||
Absolute path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is unknown or SHA-256 verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> from uniface.models import RetinaFaceWeights, verify_model_weights
|
||||
>>> verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
Example:
|
||||
>>> from uniface.constants import RetinaFaceWeights
|
||||
>>> from uniface.model_store import verify_model_weights
|
||||
>>> path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
>>> print(path)
|
||||
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
|
||||
|
||||
>>> verify_model_weights(RetinaFaceWeights.RESNET34, root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
@@ -62,49 +70,58 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to download model '{model_name}': {e}")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'") from e
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning("Corrupted weight detected. Removing...")
|
||||
Logger.warning('Corrupted weight detected. Removing...')
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
def download_file(url: str, dest_path: str, timeout: int = 30) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path.
|
||||
|
||||
Args:
|
||||
url: URL to download from.
|
||||
dest_path: Local file path to save to.
|
||||
timeout: Connection timeout in seconds. Defaults to 30.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response = requests.get(url, stream=True, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as file, tqdm(
|
||||
desc=f"Downloading {dest_path}",
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024
|
||||
) as progress:
|
||||
with (
|
||||
open(dest_path, 'wb') as file,
|
||||
tqdm(
|
||||
desc=f'Downloading {dest_path}',
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as progress,
|
||||
):
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
progress.update(len(chunk))
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
|
||||
raise ConnectionError(f'Failed to download file from {url}. Error: {e}') from e
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
|
||||
Logger.warning(f'Expected hash: {expected_hash}, but got: {actual_hash}')
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
model_names = [model.value for model in const.RetinaFaceWeights]
|
||||
|
||||
# Download each model in the list
|
||||
|
||||
@@ -2,20 +2,23 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Utilities for ONNX Runtime configuration and provider selection.
|
||||
"""ONNX Runtime utilities for UniFace.
|
||||
|
||||
This module provides helper functions for creating and managing ONNX Runtime
|
||||
inference sessions with automatic hardware acceleration detection.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from __future__ import annotations
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['create_onnx_session', 'get_available_providers']
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""
|
||||
Get list of available ONNX Runtime execution providers for the current platform.
|
||||
|
||||
def get_available_providers() -> list[str]:
|
||||
"""Get list of available ONNX Runtime execution providers.
|
||||
|
||||
Automatically detects and prioritizes hardware acceleration:
|
||||
- CoreML on Apple Silicon (M1/M2/M3/M4)
|
||||
@@ -23,65 +26,81 @@ def get_available_providers() -> List[str]:
|
||||
- CPU as fallback (always available)
|
||||
|
||||
Returns:
|
||||
List[str]: Ordered list of execution providers to use
|
||||
Ordered list of execution providers to use.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> providers = get_available_providers()
|
||||
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On CPU-only: ['CPUExecutionProvider']
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = []
|
||||
|
||||
# Priority order: CoreML > CUDA > CPU
|
||||
if "CoreMLExecutionProvider" in available:
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
Logger.info("CoreML acceleration enabled (Apple Silicon)")
|
||||
if 'CoreMLExecutionProvider' in available:
|
||||
providers.append('CoreMLExecutionProvider')
|
||||
Logger.info('CoreML acceleration enabled (Apple Silicon)')
|
||||
|
||||
if "CUDAExecutionProvider" in available:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
Logger.info("CUDA acceleration enabled (NVIDIA GPU)")
|
||||
if 'CUDAExecutionProvider' in available:
|
||||
providers.append('CUDAExecutionProvider')
|
||||
Logger.info('CUDA acceleration enabled (NVIDIA GPU)')
|
||||
|
||||
# CPU is always available as fallback
|
||||
providers.append("CPUExecutionProvider")
|
||||
providers.append('CPUExecutionProvider')
|
||||
|
||||
if len(providers) == 1:
|
||||
Logger.info("Using CPU execution (no hardware acceleration detected)")
|
||||
Logger.info('Using CPU execution (no hardware acceleration detected)')
|
||||
|
||||
return providers
|
||||
|
||||
|
||||
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
|
||||
"""
|
||||
Create an ONNX Runtime inference session with optimal provider selection.
|
||||
def create_onnx_session(
|
||||
model_path: str,
|
||||
providers: list[str] | None = None,
|
||||
) -> ort.InferenceSession:
|
||||
"""Create an ONNX Runtime inference session with optimal provider selection.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the ONNX model file
|
||||
providers (List[str], optional): List of providers to use.
|
||||
If None, automatically detects best available providers.
|
||||
model_path: Path to the ONNX model file.
|
||||
providers: List of execution providers to use. If None, automatically
|
||||
detects best available providers.
|
||||
|
||||
Returns:
|
||||
ort.InferenceSession: Configured ONNX Runtime session
|
||||
Configured ONNX Runtime session.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If session creation fails
|
||||
RuntimeError: If session creation fails.
|
||||
|
||||
Examples:
|
||||
>>> session = create_onnx_session("model.onnx")
|
||||
Example:
|
||||
>>> session = create_onnx_session('model.onnx')
|
||||
>>> # Automatically uses best available providers
|
||||
|
||||
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
|
||||
>>> session = create_onnx_session('model.onnx', providers=['CPUExecutionProvider'])
|
||||
>>> # Force CPU-only execution
|
||||
"""
|
||||
if providers is None:
|
||||
providers = get_available_providers()
|
||||
|
||||
# Suppress ONNX Runtime warnings (e.g., CoreML partition warnings)
|
||||
# Log levels: 0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # Only show ERROR and FATAL
|
||||
|
||||
try:
|
||||
session = ort.InferenceSession(model_path, providers=providers)
|
||||
session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
|
||||
active_provider = session.get_providers()[0]
|
||||
Logger.debug(f"Session created with provider: {active_provider}")
|
||||
Logger.debug(f'Session created with provider: {active_provider}')
|
||||
|
||||
# Show user-friendly message about which provider is being used
|
||||
provider_names = {
|
||||
'CoreMLExecutionProvider': 'CoreML (Apple Silicon)',
|
||||
'CUDAExecutionProvider': 'CUDA (NVIDIA GPU)',
|
||||
'CPUExecutionProvider': 'CPU',
|
||||
}
|
||||
provider_display = provider_names.get(active_provider, active_provider)
|
||||
Logger.info(f'✓ Model loaded ({provider_display})')
|
||||
|
||||
return session
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}") from e
|
||||
Logger.error(f'Failed to create ONNX session: {e}', exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize ONNX Runtime session: {e}') from e
|
||||
|
||||
52
uniface/parsing/__init__.py
Normal file
52
uniface/parsing/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
from .base import BaseFaceParser
|
||||
from .bisenet import BiSeNet
|
||||
|
||||
__all__ = ['BaseFaceParser', 'BiSeNet', 'create_face_parser']
|
||||
|
||||
|
||||
def create_face_parser(
|
||||
model_name: str | ParsingWeights = ParsingWeights.RESNET18,
|
||||
) -> BaseFaceParser:
|
||||
"""Factory function to create a face parsing model instance.
|
||||
|
||||
This function provides a convenient way to instantiate face parsing models
|
||||
without directly importing the specific model classes.
|
||||
|
||||
Args:
|
||||
model_name: The face parsing model to create. Can be either a string
|
||||
or a ParsingWeights enum value. Available options:
|
||||
- 'parsing_resnet18' or ParsingWeights.RESNET18 (default)
|
||||
- 'parsing_resnet34' or ParsingWeights.RESNET34
|
||||
|
||||
Returns:
|
||||
An instance of the requested face parsing model.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model_name is not recognized.
|
||||
|
||||
Example:
|
||||
>>> from uniface.parsing import create_face_parser
|
||||
>>> from uniface.constants import ParsingWeights
|
||||
>>> parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
>>> mask = parser.parse(face_crop)
|
||||
"""
|
||||
# Convert string to enum if necessary
|
||||
if isinstance(model_name, str):
|
||||
try:
|
||||
model_name = ParsingWeights(model_name)
|
||||
except ValueError as e:
|
||||
valid_models = [e.value for e in ParsingWeights]
|
||||
raise ValueError(
|
||||
f"Unknown face parsing model: '{model_name}'. Valid options are: {', '.join(valid_models)}"
|
||||
) from e
|
||||
|
||||
# All parsing models use the same BiSeNet class
|
||||
return BiSeNet(model_name=model_name)
|
||||
105
uniface/parsing/base.py
Normal file
105
uniface/parsing/base.py
Normal file
@@ -0,0 +1,105 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseFaceParser(ABC):
|
||||
"""
|
||||
Abstract base class for all face parsing models.
|
||||
|
||||
This class defines the common interface that all face parsing models must implement,
|
||||
ensuring consistency across different parsing methods. Face parsing segments a face
|
||||
image into semantic regions such as skin, eyes, nose, mouth, hair, etc.
|
||||
|
||||
The output is a segmentation mask where each pixel is assigned a class label
|
||||
representing a facial component.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input face image for model inference.
|
||||
|
||||
This method should take a raw face crop and convert it into the format
|
||||
expected by the model's inference engine (e.g., normalized tensor).
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format with
|
||||
shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess raw model outputs into a segmentation mask.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a segmentation mask at the original image size.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw outputs from the model inference.
|
||||
original_size (Tuple[int, int]): Original image size (width, height).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as the original image.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
The face should be roughly centered and
|
||||
well-framed within the image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image,
|
||||
where each pixel value represents a facial component class.
|
||||
|
||||
Example:
|
||||
>>> parser = create_face_parser()
|
||||
>>> mask = parser.parse(face_crop)
|
||||
>>> print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the parse method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `parse` method.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
return self.parse(face_image)
|
||||
165
uniface/parsing/bisenet.py
Normal file
165
uniface/parsing/bisenet.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseFaceParser
|
||||
|
||||
__all__ = ['BiSeNet']
|
||||
|
||||
|
||||
class BiSeNet(BaseFaceParser):
|
||||
"""
|
||||
BiSeNet: Bilateral Segmentation Network for Face Parsing with ONNX Runtime.
|
||||
|
||||
BiSeNet is a semantic segmentation model that segments a face image into
|
||||
different facial components such as skin, eyes, nose, mouth, hair, etc. The model
|
||||
uses a BiSeNet architecture with ResNet backbone and outputs a segmentation mask
|
||||
where each pixel is assigned a class label.
|
||||
|
||||
The model supports 19 facial component classes including:
|
||||
- Background, skin, eyebrows, eyes, nose, mouth, lips, ears, hair, etc.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/face-parsing
|
||||
|
||||
Args:
|
||||
model_name (ParsingWeights): The enum specifying the parsing model to load.
|
||||
Options: RESNET18, RESNET34.
|
||||
Defaults to `ParsingWeights.RESNET18`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (512, 512).
|
||||
|
||||
Attributes:
|
||||
input_size (Tuple[int, int]): Model input dimensions.
|
||||
input_mean (np.ndarray): Per-channel mean values for normalization (ImageNet).
|
||||
input_std (np.ndarray): Per-channel std values for normalization (ImageNet).
|
||||
|
||||
Example:
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> parser = BiSeNet()
|
||||
>>>
|
||||
>>> # Detect faces and parse each face
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face.bbox
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... mask = parser.parse(face_crop)
|
||||
... print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ParsingWeights = ParsingWeights.RESNET18,
|
||||
input_size: tuple[int, int] = (512, 512),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing BiSeNet with model={model_name}, input_size={input_size}')
|
||||
|
||||
self.input_size = input_size
|
||||
self.input_mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
self.input_std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
input_shape = input_cfg.shape
|
||||
self.input_name = input_cfg.name
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update from model
|
||||
|
||||
# Get output configuration
|
||||
outputs = self.session.get_outputs()
|
||||
self.output_names = [output.name for output in outputs]
|
||||
|
||||
Logger.info(f'BiSeNet initialized with input size {self.input_size}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load parsing model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize parsing model: {e}') from e
|
||||
|
||||
def preprocess(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess a face image for parsing.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor with shape (1, 3, H, W).
|
||||
"""
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Resize to model input size
|
||||
image = cv2.resize(image, self.input_size, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
# Normalize to [0, 1] and apply normalization
|
||||
image = image.astype(np.float32) / 255.0
|
||||
image = (image - self.input_mean) / self.input_std
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0).astype(np.float32)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess model output to segmentation mask.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw model output.
|
||||
original_size (Tuple[int, int]): Original image size (width, height).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask resized to original dimensions.
|
||||
"""
|
||||
# Get the class with highest probability for each pixel
|
||||
predicted_mask = outputs.squeeze(0).argmax(0).astype(np.uint8)
|
||||
|
||||
# Resize back to original size
|
||||
restored_mask = cv2.resize(predicted_mask, original_size, interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
return restored_mask
|
||||
|
||||
def parse(self, face_image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Perform end-to-end face parsing on a face image.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the segmentation mask.
|
||||
|
||||
Args:
|
||||
face_image (np.ndarray): A face image in BGR format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Segmentation mask with the same size as input image.
|
||||
"""
|
||||
original_size = (face_image.shape[1], face_image.shape[0]) # (width, height)
|
||||
input_tensor = self.preprocess(face_image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
|
||||
return self.postprocess(outputs[0], original_size)
|
||||
52
uniface/privacy/__init__.py
Normal file
52
uniface/privacy/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .blur import BlurFace
|
||||
|
||||
|
||||
def anonymize_faces(
|
||||
image: np.ndarray,
|
||||
detector: object | None = None,
|
||||
method: str = 'pixelate',
|
||||
blur_strength: float = 3.0,
|
||||
pixel_blocks: int = 10,
|
||||
confidence_threshold: float = 0.5,
|
||||
**kwargs,
|
||||
) -> np.ndarray:
|
||||
"""One-line face anonymization with automatic detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
detector: Face detector instance. Creates RetinaFace if None.
|
||||
method (str): Blur method name. Defaults to 'pixelate'.
|
||||
blur_strength (float): Blur intensity. Defaults to 3.0.
|
||||
pixel_blocks (int): Block count for pixelate. Defaults to 10.
|
||||
confidence_threshold (float): Detection confidence threshold. Defaults to 0.5.
|
||||
**kwargs: Additional detector arguments.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anonymized image.
|
||||
|
||||
Example:
|
||||
>>> from uniface.privacy import anonymize_faces
|
||||
>>> anonymized = anonymize_faces(image, method='pixelate')
|
||||
"""
|
||||
if detector is None:
|
||||
try:
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace(confidence_threshold=confidence_threshold, **kwargs)
|
||||
except ImportError as err:
|
||||
raise ImportError('Could not import RetinaFace. Please ensure UniFace is properly installed.') from err
|
||||
|
||||
faces = detector.detect(image)
|
||||
blurrer = BlurFace(method=method, blur_strength=blur_strength, pixel_blocks=pixel_blocks)
|
||||
return blurrer.anonymize(image, faces)
|
||||
|
||||
|
||||
__all__ = ['BlurFace', 'anonymize_faces']
|
||||
200
uniface/privacy/blur.py
Normal file
200
uniface/privacy/blur.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
__all__ = ['BlurFace', 'EllipticalBlur']
|
||||
|
||||
|
||||
def _gaussian_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
|
||||
"""Apply Gaussian blur to a region."""
|
||||
h, w = region.shape[:2]
|
||||
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
|
||||
return cv2.GaussianBlur(region, (kernel_size, kernel_size), 0)
|
||||
|
||||
|
||||
def _median_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
|
||||
"""Apply median blur to a region."""
|
||||
h, w = region.shape[:2]
|
||||
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
|
||||
return cv2.medianBlur(region, kernel_size)
|
||||
|
||||
|
||||
def _pixelate_blur(region: np.ndarray, blocks: int = 10) -> np.ndarray:
|
||||
"""Apply pixelation to a region."""
|
||||
h, w = region.shape[:2]
|
||||
temp_h, temp_w = max(1, h // blocks), max(1, w // blocks)
|
||||
temp = cv2.resize(region, (temp_w, temp_h), interpolation=cv2.INTER_LINEAR)
|
||||
return cv2.resize(temp, (w, h), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
|
||||
def _blackout_blur(region: np.ndarray, color: tuple[int, int, int] = (0, 0, 0)) -> np.ndarray:
|
||||
"""Replace region with solid color."""
|
||||
return np.full_like(region, color)
|
||||
|
||||
|
||||
class EllipticalBlur:
|
||||
"""Elliptical blur with soft, feathered edges.
|
||||
|
||||
This blur applies Gaussian blur within an elliptical mask that follows
|
||||
the natural oval shape of faces, requiring full image context for proper blending.
|
||||
|
||||
Args:
|
||||
blur_strength (float): Blur intensity multiplier. Defaults to 3.0.
|
||||
margin (int): Extra pixels to extend ellipse beyond bbox. Defaults to 20.
|
||||
"""
|
||||
|
||||
def __init__(self, blur_strength: float = 3.0, margin: int = 20):
|
||||
self.blur_strength = blur_strength
|
||||
self.margin = margin
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
bboxes: list[tuple | list],
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
if not inplace:
|
||||
image = image.copy()
|
||||
|
||||
h, w = image.shape[:2]
|
||||
|
||||
for bbox in bboxes:
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
|
||||
axes_x = (x2 - x1) // 2 + self.margin
|
||||
axes_y = (y2 - y1) // 2 + self.margin
|
||||
|
||||
# Create soft elliptical mask
|
||||
mask = np.zeros((h, w), dtype=np.float32)
|
||||
cv2.ellipse(mask, (center_x, center_y), (axes_x, axes_y), 0, 0, 360, 255, -1)
|
||||
mask = cv2.GaussianBlur(mask, (51, 51), 0) / 255.0
|
||||
mask = mask[:, :, np.newaxis]
|
||||
|
||||
kernel_size = max(3, int((min(axes_y, axes_x) * 2 / 7) * self.blur_strength)) | 1
|
||||
blurred = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
|
||||
image = (blurred * mask + image * (1 - mask)).astype(np.uint8)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
class BlurFace:
|
||||
"""Face blurring with multiple anonymization methods.
|
||||
|
||||
Args:
|
||||
method (str): Blur method - 'gaussian', 'pixelate', 'blackout', 'elliptical', or 'median'.
|
||||
Defaults to 'pixelate'.
|
||||
blur_strength (float): Intensity for gaussian/elliptical/median. Defaults to 3.0.
|
||||
pixel_blocks (int): Block count for pixelate. Defaults to 10.
|
||||
color (Tuple[int, int, int]): Fill color (BGR) for blackout. Defaults to (0, 0, 0).
|
||||
margin (int): Edge margin for elliptical. Defaults to 20.
|
||||
|
||||
Example:
|
||||
>>> blurrer = BlurFace(method='pixelate')
|
||||
>>> anonymized = blurrer.anonymize(image, faces)
|
||||
"""
|
||||
|
||||
VALID_METHODS: ClassVar[set[str]] = {'gaussian', 'pixelate', 'blackout', 'elliptical', 'median'}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
method: str = 'pixelate',
|
||||
blur_strength: float = 3.0,
|
||||
pixel_blocks: int = 15,
|
||||
color: tuple[int, int, int] = (0, 0, 0),
|
||||
margin: int = 20,
|
||||
):
|
||||
self.method = method.lower()
|
||||
self._blur_strength = blur_strength
|
||||
self._pixel_blocks = pixel_blocks
|
||||
self._color = color
|
||||
self._margin = margin
|
||||
|
||||
if self.method not in self.VALID_METHODS:
|
||||
raise ValueError(f"Invalid blur method: '{method}'. Choose from: {sorted(self.VALID_METHODS)}")
|
||||
|
||||
if self.method == 'elliptical':
|
||||
self._elliptical = EllipticalBlur(blur_strength, margin)
|
||||
|
||||
def _blur_region(self, region: np.ndarray) -> np.ndarray:
|
||||
"""Apply blur to a single region based on the configured method."""
|
||||
if self.method == 'gaussian':
|
||||
return _gaussian_blur(region, self._blur_strength)
|
||||
elif self.method == 'median':
|
||||
return _median_blur(region, self._blur_strength)
|
||||
elif self.method == 'pixelate':
|
||||
return _pixelate_blur(region, self._pixel_blocks)
|
||||
elif self.method == 'blackout':
|
||||
return _blackout_blur(region, self._color)
|
||||
return region # Fallback (should not reach here)
|
||||
|
||||
def anonymize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
faces: list,
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""Anonymize faces in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
faces (List[Dict]): Face detections with 'bbox' key containing [x1, y1, x2, y2].
|
||||
inplace (bool): Modify image in-place if True. Defaults to False.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Image with anonymized faces.
|
||||
"""
|
||||
if not faces:
|
||||
return image if inplace else image.copy()
|
||||
|
||||
bboxes = [face.bbox for face in faces]
|
||||
return self.blur_regions(image, bboxes, inplace)
|
||||
|
||||
def blur_regions(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
bboxes: list[tuple | list],
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""Blur specific rectangular regions in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
bboxes (List): Bounding boxes as [x1, y1, x2, y2].
|
||||
inplace (bool): Modify image in-place if True. Defaults to False.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Image with blurred regions.
|
||||
"""
|
||||
if not bboxes:
|
||||
return image if inplace else image.copy()
|
||||
|
||||
if self.method == 'elliptical':
|
||||
return self._elliptical(image, bboxes, inplace)
|
||||
|
||||
if not inplace:
|
||||
image = image.copy()
|
||||
|
||||
h, w = image.shape[:2]
|
||||
|
||||
for bbox in bboxes:
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
x1, y1 = max(0, x1), max(0, y1)
|
||||
x2, y2 = min(w, x2), min(h, y2)
|
||||
|
||||
if x2 > x1 and y2 > y1:
|
||||
image[y1:y2, x1:x2] = self._blur_region(image[y1:y2, x1:x2])
|
||||
|
||||
return image
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"BlurFace(method='{self.method}')"
|
||||
@@ -2,10 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
from .base import BaseRecognizer
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
"""
|
||||
@@ -34,10 +34,7 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
|
||||
>>> # Create a specific MobileFace recognizer
|
||||
>>> from uniface.constants import MobileFaceWeights
|
||||
>>> recognizer = create_recognizer(
|
||||
... 'mobileface',
|
||||
... model_name=MobileFaceWeights.MNET_V2
|
||||
... )
|
||||
>>> recognizer = create_recognizer('mobileface', model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
>>> # Create a SphereFace recognizer
|
||||
>>> recognizer = create_recognizer('sphereface')
|
||||
@@ -54,10 +51,5 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
available = ['arcface', 'mobileface', 'sphereface']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
__all__ = [
|
||||
"create_recognizer",
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
"BaseRecognizer",
|
||||
]
|
||||
|
||||
__all__ = ['ArcFace', 'BaseRecognizer', 'MobileFace', 'SphereFace', 'create_recognizer']
|
||||
|
||||
@@ -2,25 +2,34 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from typing import Tuple, Union, List
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ['BaseRecognizer', 'PreprocessConfig']
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessConfig:
|
||||
"""Configuration for preprocessing images before feeding them into the model.
|
||||
|
||||
Attributes:
|
||||
input_mean: Mean value(s) for normalization.
|
||||
input_std: Standard deviation value(s) for normalization.
|
||||
input_size: Target image size as (height, width).
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
|
||||
input_mean: float | list[float] = 127.5
|
||||
input_std: float | list[float] = 127.5
|
||||
input_size: tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseRecognizer(ABC):
|
||||
@@ -28,6 +37,7 @@ class BaseRecognizer(ABC):
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
@@ -63,17 +73,20 @@ class BaseRecognizer(ABC):
|
||||
input_shape = input_cfg.shape
|
||||
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
if model_input_size != self.input_size:
|
||||
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
|
||||
Logger.warning(f'Model input size {model_input_size} differs from configured size {self.input_size}')
|
||||
|
||||
# Extract output configuration
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
self.output_shape = self.session.get_outputs()[0].shape
|
||||
|
||||
assert len(self.output_names) == 1, "Expected only one output node."
|
||||
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
|
||||
assert len(self.output_names) == 1, 'Expected only one output node.'
|
||||
Logger.info(f'Successfully initialized face encoder from {self.model_path}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
|
||||
Logger.error(
|
||||
f"Failed to load face encoder model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
||||
|
||||
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
||||
@@ -88,11 +101,12 @@ class BaseRecognizer(ABC):
|
||||
"""
|
||||
resized_img = cv2.resize(face_img, self.input_size)
|
||||
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
if isinstance(self.input_std, list | tuple):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
|
||||
np.array(self.input_std, dtype=np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
self.input_std, dtype=np.float32
|
||||
)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
||||
@@ -104,18 +118,19 @@ class BaseRecognizer(ABC):
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
swapRB=True, # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return blob
|
||||
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
|
||||
"""
|
||||
Extracts face embedding from an image.
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""Extract face embedding from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
|
||||
image: Input face image in BGR format. If already aligned (112x112),
|
||||
landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if
|
||||
image is already aligned.
|
||||
|
||||
Returns:
|
||||
Face embedding vector (typically 512-dimensional).
|
||||
@@ -134,16 +149,27 @@ class BaseRecognizer(ABC):
|
||||
return embedding
|
||||
|
||||
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Extracts a l2 normalized face embedding vector from an image.
|
||||
"""Extract an L2-normalized face embedding vector from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format).
|
||||
image: Input face image in BGR format.
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
Normalized face embedding vector (typically 512-dimensional).
|
||||
L2-normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
embedding = self.get_embedding(image, landmarks)
|
||||
norm = np.linalg.norm(embedding)
|
||||
return embedding / norm if norm > 0 else embedding
|
||||
|
||||
def __call__(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""Callable shortcut for the `get_normalized_embedding` method.
|
||||
|
||||
Args:
|
||||
image: Input face image in BGR format.
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
L2-normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
return self.get_normalized_embedding(image, landmarks)
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from .base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
|
||||
__all__ = ['ArcFace', 'MobileFace', 'SphereFace']
|
||||
|
||||
|
||||
class ArcFace(BaseRecognizer):
|
||||
@@ -33,14 +34,10 @@ class ArcFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -67,14 +64,10 @@ class MobileFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -101,14 +94,10 @@ class SphereFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
47
uniface/spoofing/__init__.py
Normal file
47
uniface/spoofing/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
from .base import BaseSpoofer
|
||||
from .minifasnet import MiniFASNet
|
||||
|
||||
__all__ = [
|
||||
'BaseSpoofer',
|
||||
'MiniFASNet',
|
||||
'MiniFASNetWeights',
|
||||
'SpoofingResult',
|
||||
'create_spoofer',
|
||||
]
|
||||
|
||||
|
||||
def create_spoofer(
|
||||
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
|
||||
scale: float | None = None,
|
||||
) -> MiniFASNet:
|
||||
"""Factory function to create a face anti-spoofing model.
|
||||
|
||||
This is a convenience function that creates a MiniFASNet instance
|
||||
with the specified model variant and optional custom scale.
|
||||
|
||||
Args:
|
||||
model_name: The model variant to use. Options:
|
||||
- MiniFASNetWeights.V2: Improved version (default), uses scale=2.7
|
||||
- MiniFASNetWeights.V1SE: Squeeze-and-excitation version, uses scale=4.0
|
||||
scale: Custom crop scale factor for face region. If None, uses the
|
||||
default scale for the selected model variant.
|
||||
|
||||
Returns:
|
||||
An initialized face anti-spoofing model.
|
||||
|
||||
Example:
|
||||
>>> from uniface.spoofing import create_spoofer, MiniFASNetWeights
|
||||
>>> spoofer = create_spoofer()
|
||||
>>> result = spoofer.predict(image, face.bbox)
|
||||
>>> print(f'Is real: {result.is_real}, Confidence: {result.confidence:.2%}')
|
||||
"""
|
||||
return MiniFASNet(model_name=model_name, scale=scale)
|
||||
112
uniface/spoofing/base.py
Normal file
112
uniface/spoofing/base.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
__all__ = ['BaseSpoofer', 'SpoofingResult']
|
||||
|
||||
|
||||
class BaseSpoofer(ABC):
|
||||
"""
|
||||
Abstract base class for all face anti-spoofing models.
|
||||
|
||||
This class defines the common interface that all anti-spoofing models must implement,
|
||||
ensuring consistency across different spoofing detection methods. Anti-spoofing models
|
||||
detect whether a face is real (live person) or fake (photo, video, mask, etc.).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input image for model inference.
|
||||
|
||||
This method should crop the face region using the bounding box,
|
||||
resize it to the model's expected input size, and normalize
|
||||
the pixel values as required by the model.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format with shape (H, W, C).
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Postprocess raw model outputs into prediction result.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a SpoofingResult.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw outputs from the model inference (logits).
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Perform end-to-end anti-spoofing prediction on a face.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the prediction.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format containing the face.
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
This is typically obtained from a face detector.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
|
||||
Example:
|
||||
>>> spoofer = MiniFASNet()
|
||||
>>> detector = RetinaFace()
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... result = spoofer.predict(image, face.bbox)
|
||||
... label = 'Real' if result.is_real else 'Fake'
|
||||
... print(f'{label}: {result.confidence:.2%}')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `predict` method.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
return self.predict(image, bbox)
|
||||
220
uniface/spoofing/minifasnet.py
Normal file
220
uniface/spoofing/minifasnet.py
Normal file
@@ -0,0 +1,220 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
from .base import BaseSpoofer
|
||||
|
||||
__all__ = ['MiniFASNet']
|
||||
|
||||
# Default crop scales for each model variant
|
||||
DEFAULT_SCALES = {
|
||||
MiniFASNetWeights.V1SE: 4.0,
|
||||
MiniFASNetWeights.V2: 2.7,
|
||||
}
|
||||
|
||||
|
||||
class MiniFASNet(BaseSpoofer):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing with ONNX Runtime.
|
||||
|
||||
MiniFASNet is a face anti-spoofing model that detects whether a face is real
|
||||
(live person) or fake (photo, video replay, mask, etc.). It supports two model
|
||||
variants: V1SE (with squeeze-and-excitation) and V2 (improved version).
|
||||
|
||||
The model takes a face region cropped from the image using a bounding box
|
||||
and predicts whether it's a real or spoofed face.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Args:
|
||||
model_name (MiniFASNetWeights): The enum specifying the model variant to load.
|
||||
Options: V1SE (scale=4.0), V2 (scale=2.7).
|
||||
Defaults to `MiniFASNetWeights.V2`.
|
||||
scale (Optional[float]): Custom crop scale factor for face region.
|
||||
If None, uses the default scale for the selected model variant.
|
||||
V1SE uses 4.0, V2 uses 2.7.
|
||||
|
||||
Attributes:
|
||||
scale (float): Crop scale factor for face region extraction.
|
||||
input_size (Tuple[int, int]): Model input dimensions (width, height).
|
||||
|
||||
Example:
|
||||
>>> from uniface.spoofing import MiniFASNet
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> spoofer = MiniFASNet()
|
||||
>>>
|
||||
>>> # Detect faces and check if they are real
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... result = spoofer.predict(image, face.bbox)
|
||||
... label = 'Real' if result.is_real else 'Fake'
|
||||
... print(f'{label}: {result.confidence:.2%}')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
|
||||
scale: float | None = None,
|
||||
) -> None:
|
||||
Logger.info(f'Initializing MiniFASNet with model={model_name.name}')
|
||||
|
||||
# Use default scale for the model variant if not specified
|
||||
self.scale = scale if scale is not None else DEFAULT_SCALES.get(model_name, 2.7)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
self.input_name = input_cfg.name
|
||||
# Input shape is (batch, channels, height, width) - we need (width, height)
|
||||
self.input_size = tuple(input_cfg.shape[2:4][::-1]) # (width, height)
|
||||
|
||||
# Get output configuration
|
||||
output_cfg = self.session.get_outputs()[0]
|
||||
self.output_name = output_cfg.name
|
||||
|
||||
Logger.info(f'MiniFASNet initialized with input size {self.input_size}, scale={self.scale}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load MiniFASNet model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize MiniFASNet model: {e}') from e
|
||||
|
||||
def _xyxy_to_xywh(self, bbox: list | np.ndarray) -> list[int]:
|
||||
"""Convert bounding box from [x1, y1, x2, y2] to [x, y, w, h] format."""
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
return [int(x1), int(y1), int(x2 - x1), int(y2 - y1)]
|
||||
|
||||
def _crop_face(self, image: np.ndarray, bbox_xywh: list[int]) -> np.ndarray:
|
||||
"""
|
||||
Crop and resize face region from image using scale factor.
|
||||
|
||||
The crop is centered on the face bounding box and scaled to capture
|
||||
more context around the face, which is important for anti-spoofing.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format.
|
||||
bbox_xywh: Face bounding box in [x, y, w, h] format.
|
||||
|
||||
Returns:
|
||||
Cropped and resized face region.
|
||||
"""
|
||||
src_h, src_w = image.shape[:2]
|
||||
x, y, box_w, box_h = bbox_xywh
|
||||
|
||||
# Calculate the scale to apply based on image and face size
|
||||
scale = min((src_h - 1) / box_h, (src_w - 1) / box_w, self.scale)
|
||||
new_w = box_w * scale
|
||||
new_h = box_h * scale
|
||||
|
||||
# Calculate center of the bounding box
|
||||
center_x = x + box_w / 2
|
||||
center_y = y + box_h / 2
|
||||
|
||||
# Calculate new bounding box coordinates
|
||||
x1 = max(0, int(center_x - new_w / 2))
|
||||
y1 = max(0, int(center_y - new_h / 2))
|
||||
x2 = min(src_w - 1, int(center_x + new_w / 2))
|
||||
y2 = min(src_h - 1, int(center_y + new_h / 2))
|
||||
|
||||
# Crop and resize
|
||||
cropped = image[y1 : y2 + 1, x1 : x2 + 1]
|
||||
resized = cv2.resize(cropped, self.input_size)
|
||||
|
||||
return resized
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input image for model inference.
|
||||
|
||||
Crops the face region, converts to float32, and arranges
|
||||
dimensions for the model (NCHW format).
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format with shape (H, W, C).
|
||||
bbox: Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
Preprocessed image tensor with shape (1, C, H, W).
|
||||
"""
|
||||
# Convert bbox format
|
||||
bbox_xywh = self._xyxy_to_xywh(bbox)
|
||||
|
||||
# Crop and resize face region
|
||||
face = self._crop_face(image, bbox_xywh)
|
||||
|
||||
# Convert to float32 (no normalization needed for this model)
|
||||
face = face.astype(np.float32)
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
face = np.transpose(face, (2, 0, 1))
|
||||
face = np.expand_dims(face, axis=0)
|
||||
|
||||
return face
|
||||
|
||||
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
||||
"""Apply softmax to logits along axis 1."""
|
||||
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
||||
return e_x / e_x.sum(axis=1, keepdims=True)
|
||||
|
||||
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Postprocess raw model outputs into prediction result.
|
||||
|
||||
Applies softmax to convert logits to probabilities and
|
||||
returns the SpoofingResult with is_real flag and confidence score.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference (logits).
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
probs = self._softmax(outputs)
|
||||
label_idx = int(np.argmax(probs))
|
||||
confidence = float(probs[0, label_idx])
|
||||
|
||||
return SpoofingResult(is_real=(label_idx == 1), confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Perform end-to-end anti-spoofing prediction on a face.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format containing the face.
|
||||
bbox: Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
# Preprocess
|
||||
input_tensor = self.preprocess(image, bbox)
|
||||
|
||||
# Run inference
|
||||
outputs = self.session.run([self.output_name], {self.input_name: input_tensor})[0]
|
||||
|
||||
# Postprocess and return
|
||||
return self.postprocess(outputs)
|
||||
216
uniface/types.py
Normal file
216
uniface/types.py
Normal file
@@ -0,0 +1,216 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Unified type definitions for UniFace.
|
||||
|
||||
This module centralizes all result dataclasses used across the library,
|
||||
providing consistent and immutable return types for model predictions.
|
||||
|
||||
Note on mutability:
|
||||
- Result dataclasses (GazeResult, SpoofingResult, EmotionResult, AttributeResult)
|
||||
are frozen (immutable) since they represent computation outputs that shouldn't change.
|
||||
- Face dataclass is mutable because FaceAnalyzer enriches it with additional
|
||||
attributes (embedding, age, gender, etc.) after initial detection.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, fields
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
__all__ = [
|
||||
'AttributeResult',
|
||||
'EmotionResult',
|
||||
'Face',
|
||||
'GazeResult',
|
||||
'SpoofingResult',
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class GazeResult:
|
||||
"""Result of gaze estimation.
|
||||
|
||||
Attributes:
|
||||
pitch: Vertical gaze angle in radians (positive = up, negative = down).
|
||||
yaw: Horizontal gaze angle in radians (positive = right, negative = left).
|
||||
"""
|
||||
|
||||
pitch: float
|
||||
yaw: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'GazeResult(pitch={self.pitch:.4f}, yaw={self.yaw:.4f})'
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class SpoofingResult:
|
||||
"""Result of face anti-spoofing detection.
|
||||
|
||||
Attributes:
|
||||
is_real: True if the face is real/live, False if fake/spoof.
|
||||
confidence: Confidence score for the prediction (0.0 to 1.0).
|
||||
"""
|
||||
|
||||
is_real: bool
|
||||
confidence: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
label = 'Real' if self.is_real else 'Fake'
|
||||
return f'SpoofingResult({label}, confidence={self.confidence:.4f})'
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class EmotionResult:
|
||||
"""Result of emotion recognition.
|
||||
|
||||
Attributes:
|
||||
emotion: Predicted emotion label (e.g., 'Happy', 'Sad', 'Angry').
|
||||
confidence: Confidence score for the prediction (0.0 to 1.0).
|
||||
"""
|
||||
|
||||
emotion: str
|
||||
confidence: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"EmotionResult('{self.emotion}', confidence={self.confidence:.4f})"
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class AttributeResult:
|
||||
"""Unified result structure for face attribute prediction.
|
||||
|
||||
This dataclass provides a consistent return type across different attribute
|
||||
prediction models (e.g., AgeGender, FairFace), enabling interoperability
|
||||
and unified handling of results.
|
||||
|
||||
Attributes:
|
||||
gender: Predicted gender (0=Female, 1=Male).
|
||||
age: Exact age in years. Provided by AgeGender model, None for FairFace.
|
||||
age_group: Age range string like "20-29". Provided by FairFace, None for AgeGender.
|
||||
race: Race/ethnicity label. Provided by FairFace only.
|
||||
|
||||
Properties:
|
||||
sex: Gender as a human-readable string ("Female" or "Male").
|
||||
|
||||
Examples:
|
||||
>>> # AgeGender result
|
||||
>>> result = AttributeResult(gender=1, age=25)
|
||||
>>> result.sex
|
||||
'Male'
|
||||
|
||||
>>> # FairFace result
|
||||
>>> result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
>>> result.sex
|
||||
'Female'
|
||||
"""
|
||||
|
||||
gender: int
|
||||
age: int | None = None
|
||||
age_group: str | None = None
|
||||
race: str | None = None
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'gender={self.sex}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.age_group is not None:
|
||||
parts.append(f'age_group={self.age_group}')
|
||||
if self.race is not None:
|
||||
parts.append(f'race={self.race}')
|
||||
return f'AttributeResult({", ".join(parts)})'
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Face:
|
||||
"""Detected face with analysis results.
|
||||
|
||||
This dataclass represents a single detected face along with optional
|
||||
analysis results such as embeddings, age, gender, and race predictions.
|
||||
|
||||
Note: This dataclass is mutable (not frozen) because FaceAnalyzer enriches
|
||||
Face objects with additional attributes after initial detection.
|
||||
|
||||
Attributes:
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
confidence: Detection confidence score.
|
||||
landmarks: Facial landmark coordinates (typically 5 points).
|
||||
embedding: Face embedding vector for recognition (optional).
|
||||
gender: Predicted gender, 0=Female, 1=Male (optional).
|
||||
age: Predicted exact age in years (optional, from AgeGender model).
|
||||
age_group: Predicted age range like "20-29" (optional, from FairFace).
|
||||
race: Predicted race/ethnicity (optional, from FairFace).
|
||||
emotion: Predicted emotion label (optional, from Emotion model).
|
||||
emotion_confidence: Confidence score for emotion prediction (optional).
|
||||
|
||||
Properties:
|
||||
sex: Gender as a human-readable string ("Female" or "Male").
|
||||
bbox_xyxy: Bounding box in (x1, y1, x2, y2) format.
|
||||
bbox_xywh: Bounding box in (x1, y1, width, height) format.
|
||||
"""
|
||||
|
||||
# Required attributes (from detection)
|
||||
bbox: np.ndarray
|
||||
confidence: float
|
||||
landmarks: np.ndarray
|
||||
|
||||
# Optional attributes (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None
|
||||
age: int | None = None
|
||||
age_group: str | None = None
|
||||
race: str | None = None
|
||||
emotion: str | None = None
|
||||
emotion_confidence: float | None = None
|
||||
|
||||
def compute_similarity(self, other: Face) -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
if self.embedding is None or other.embedding is None:
|
||||
raise ValueError('Both faces must have embeddings for similarity computation')
|
||||
return float(compute_similarity(self.embedding, other.embedding))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return {f.name: getattr(self, f.name) for f in fields(self)}
|
||||
|
||||
@property
|
||||
def sex(self) -> str | None:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
if self.gender is None:
|
||||
return None
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
@property
|
||||
def bbox_xyxy(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
|
||||
return self.bbox.copy()
|
||||
|
||||
@property
|
||||
def bbox_xywh(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, w, h) format."""
|
||||
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.age_group is not None:
|
||||
parts.append(f'age_group={self.age_group}')
|
||||
if self.gender is not None:
|
||||
parts.append(f'sex={self.sex}')
|
||||
if self.race is not None:
|
||||
parts.append(f'race={self.race}')
|
||||
if self.emotion is not None:
|
||||
parts.append(f'emotion={self.emotion}')
|
||||
if self.embedding is not None:
|
||||
parts.append(f'embedding_dim={self.embedding.shape[0]}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,49 +2,340 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Visualization utilities for UniFace.
|
||||
|
||||
This module provides functions for drawing detection results, gaze directions,
|
||||
and face parsing segmentation maps on images.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import List, Union
|
||||
|
||||
__all__ = [
|
||||
'FACE_PARSING_COLORS',
|
||||
'FACE_PARSING_LABELS',
|
||||
'draw_detections',
|
||||
'draw_fancy_bbox',
|
||||
'draw_gaze',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
# Face parsing component names (19 classes)
|
||||
FACE_PARSING_LABELS = [
|
||||
'background',
|
||||
'skin',
|
||||
'l_brow',
|
||||
'r_brow',
|
||||
'l_eye',
|
||||
'r_eye',
|
||||
'eye_g',
|
||||
'l_ear',
|
||||
'r_ear',
|
||||
'ear_r',
|
||||
'nose',
|
||||
'mouth',
|
||||
'u_lip',
|
||||
'l_lip',
|
||||
'neck',
|
||||
'neck_l',
|
||||
'cloth',
|
||||
'hair',
|
||||
'hat',
|
||||
]
|
||||
|
||||
# Color palette for face parsing visualization
|
||||
FACE_PARSING_COLORS = [
|
||||
[0, 0, 0],
|
||||
[255, 85, 0],
|
||||
[255, 170, 0],
|
||||
[255, 0, 85],
|
||||
[255, 0, 170],
|
||||
[0, 255, 0],
|
||||
[85, 255, 0],
|
||||
[170, 255, 0],
|
||||
[0, 255, 85],
|
||||
[0, 255, 170],
|
||||
[0, 0, 255],
|
||||
[85, 0, 255],
|
||||
[170, 0, 255],
|
||||
[0, 85, 255],
|
||||
[0, 170, 255],
|
||||
[255, 255, 0],
|
||||
[255, 255, 85],
|
||||
[255, 255, 170],
|
||||
[255, 0, 255],
|
||||
]
|
||||
|
||||
|
||||
def draw_detections(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
bboxes: Union[np.ndarray, List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[np.ndarray, List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6
|
||||
):
|
||||
"""
|
||||
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
|
||||
bboxes: list[np.ndarray] | list[list[float]],
|
||||
scores: np.ndarray | list[float],
|
||||
landmarks: list[np.ndarray] | list[list[list[float]]],
|
||||
vis_threshold: float = 0.6,
|
||||
draw_score: bool = False,
|
||||
fancy_bbox: bool = True,
|
||||
) -> None:
|
||||
"""Draw bounding boxes, landmarks, and optional scores on an image.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The image to draw on.
|
||||
bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
|
||||
scores (list or np.ndarray): A list of confidence scores.
|
||||
landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
|
||||
vis_threshold (float): Confidence threshold for filtering which detections to draw.
|
||||
image: Input image to draw on (modified in-place).
|
||||
bboxes: List of bounding boxes as [x1, y1, x2, y2].
|
||||
scores: List of confidence scores.
|
||||
landmarks: List of landmark sets with shape (5, 2).
|
||||
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
||||
draw_score: Whether to draw confidence scores. Defaults to False.
|
||||
fancy_bbox: Use corner-style bounding boxes. Defaults to True.
|
||||
"""
|
||||
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
# Filter detections by score
|
||||
# Calculate line thickness based on image size
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Filter detections by confidence threshold
|
||||
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
||||
|
||||
# Draw the filtered detections
|
||||
for i in keep_indices:
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
score = scores[i]
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
|
||||
# Calculate adaptive thickness
|
||||
thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100))
|
||||
# Calculate dynamic font scale based on bbox height
|
||||
bbox_h = bbox[3] - bbox[1]
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness, proportion=0.2)
|
||||
else:
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 255, 0), line_thickness)
|
||||
|
||||
# Draw score
|
||||
cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)
|
||||
# Draw confidence score with background
|
||||
if draw_score:
|
||||
text = f'{score:.2f}'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(bbox[0], bbox[1] - text_height - baseline - 10),
|
||||
(bbox[0] + text_width + 10, bbox[1]),
|
||||
(0, 255, 0),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(bbox[0] + 5, bbox[1] - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(0, 0, 0),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)
|
||||
cv2.circle(image, tuple(point), line_thickness + 1, colors[j], -1)
|
||||
|
||||
|
||||
def draw_fancy_bbox(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
color: tuple[int, int, int] = (0, 255, 0),
|
||||
thickness: int = 3,
|
||||
proportion: float = 0.2,
|
||||
) -> None:
|
||||
"""Draw a bounding box with fancy corners on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
color: Color of the bounding box in BGR. Defaults to green.
|
||||
thickness: Thickness of the corner lines. Defaults to 3.
|
||||
proportion: Proportion of corner length to box dimensions. Defaults to 0.2.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
width = x2 - x1
|
||||
height = y2 - y1
|
||||
|
||||
corner_length = int(proportion * min(width, height))
|
||||
|
||||
# Draw the rectangle
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
|
||||
|
||||
# Top-left corner
|
||||
cv2.line(image, (x1, y1), (x1 + corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x1, y1), (x1, y1 + corner_length), color, thickness)
|
||||
|
||||
# Top-right corner
|
||||
cv2.line(image, (x2, y1), (x2 - corner_length, y1), color, thickness)
|
||||
cv2.line(image, (x2, y1), (x2, y1 + corner_length), color, thickness)
|
||||
|
||||
# Bottom-left corner
|
||||
cv2.line(image, (x1, y2), (x1, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x1, y2), (x1 + corner_length, y2), color, thickness)
|
||||
|
||||
# Bottom-right corner
|
||||
cv2.line(image, (x2, y2), (x2, y2 - corner_length), color, thickness)
|
||||
cv2.line(image, (x2, y2), (x2 - corner_length, y2), color, thickness)
|
||||
|
||||
|
||||
def draw_gaze(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
pitch: np.ndarray | float,
|
||||
yaw: np.ndarray | float,
|
||||
*,
|
||||
draw_bbox: bool = True,
|
||||
fancy_bbox: bool = True,
|
||||
draw_angles: bool = True,
|
||||
) -> None:
|
||||
"""Draw gaze direction with optional bounding box on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Face bounding box [x1, y1, x2, y2].
|
||||
pitch: Vertical gaze angle in radians.
|
||||
yaw: Horizontal gaze angle in radians.
|
||||
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
||||
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
|
||||
"""
|
||||
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
||||
|
||||
# Calculate dynamic line thickness based on image size (same as draw_detections)
|
||||
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
||||
|
||||
# Calculate dynamic font scale based on bbox height (same as draw_detections)
|
||||
bbox_h = y_max - y_min
|
||||
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
||||
font_thickness = 2
|
||||
|
||||
# Draw bounding box if requested
|
||||
if draw_bbox:
|
||||
if fancy_bbox:
|
||||
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness)
|
||||
else:
|
||||
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), line_thickness)
|
||||
|
||||
# Calculate center of the bounding box
|
||||
x_center = (x_min + x_max) // 2
|
||||
y_center = (y_min + y_max) // 2
|
||||
|
||||
# Calculate the direction of the gaze
|
||||
length = x_max - x_min
|
||||
dx = int(-length * np.sin(pitch) * np.cos(yaw))
|
||||
dy = int(-length * np.sin(yaw))
|
||||
|
||||
point1 = (x_center, y_center)
|
||||
point2 = (x_center + dx, y_center + dy)
|
||||
|
||||
# Calculate dynamic center point radius based on line thickness
|
||||
center_radius = max(line_thickness + 1, 4)
|
||||
|
||||
# Draw gaze direction
|
||||
cv2.circle(image, (x_center, y_center), radius=center_radius, color=(0, 0, 255), thickness=-1)
|
||||
cv2.arrowedLine(
|
||||
image,
|
||||
point1,
|
||||
point2,
|
||||
color=(0, 0, 255),
|
||||
thickness=line_thickness,
|
||||
line_type=cv2.LINE_AA,
|
||||
tipLength=0.25,
|
||||
)
|
||||
|
||||
# Draw angle values
|
||||
if draw_angles:
|
||||
text = f'P:{np.degrees(pitch):.0f}deg Y:{np.degrees(yaw):.0f}deg'
|
||||
(text_width, text_height), baseline = cv2.getTextSize(
|
||||
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
||||
)
|
||||
|
||||
# Draw background rectangle for text
|
||||
cv2.rectangle(
|
||||
image,
|
||||
(x_min, y_min - text_height - baseline - 10),
|
||||
(x_min + text_width + 10, y_min),
|
||||
(0, 0, 255),
|
||||
-1,
|
||||
)
|
||||
|
||||
# Draw text
|
||||
cv2.putText(
|
||||
image,
|
||||
text,
|
||||
(x_min + 5, y_min - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
font_scale,
|
||||
(255, 255, 255),
|
||||
font_thickness,
|
||||
)
|
||||
|
||||
|
||||
def vis_parsing_maps(
|
||||
image: np.ndarray,
|
||||
segmentation_mask: np.ndarray,
|
||||
*,
|
||||
save_image: bool = False,
|
||||
save_path: str = 'result.png',
|
||||
) -> np.ndarray:
|
||||
"""Visualize face parsing segmentation mask by overlaying colored regions.
|
||||
|
||||
Args:
|
||||
image: Input face image in RGB format with shape (H, W, 3).
|
||||
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
|
||||
value represents a facial component class (0-18).
|
||||
save_image: Whether to save the visualization to disk. Defaults to False.
|
||||
save_path: Path to save the visualization if save_image is True.
|
||||
|
||||
Returns:
|
||||
Blended image with segmentation overlay in BGR format.
|
||||
|
||||
Example:
|
||||
>>> import cv2
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface.visualization import vis_parsing_maps
|
||||
>>> parser = BiSeNet()
|
||||
>>> face_image = cv2.imread('face.jpg')
|
||||
>>> mask = parser.parse(face_image)
|
||||
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
>>> result = vis_parsing_maps(face_rgb, mask)
|
||||
>>> cv2.imwrite('parsed_face.jpg', result)
|
||||
"""
|
||||
# Create numpy arrays for image and segmentation mask
|
||||
image = np.array(image).copy().astype(np.uint8)
|
||||
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
|
||||
|
||||
# Create a color mask
|
||||
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
|
||||
|
||||
num_classes = np.max(segmentation_mask)
|
||||
|
||||
for class_index in range(1, num_classes + 1):
|
||||
class_pixels = np.where(segmentation_mask == class_index)
|
||||
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
|
||||
|
||||
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
|
||||
|
||||
# Convert image to BGR format for blending
|
||||
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Blend the image with the segmentation mask
|
||||
blended_image = cv2.addWeighted(bgr_image, 0.6, segmentation_mask_color, 0.4, 0)
|
||||
|
||||
# Save the result if required
|
||||
if save_image:
|
||||
cv2.imwrite(save_path, blended_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
||||
|
||||
return blended_image
|
||||
|
||||
Reference in New Issue
Block a user