mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbcd89b167 | ||
|
|
50226041c9 | ||
|
|
64ad0d2f53 | ||
|
|
7c98a60d26 | ||
|
|
d97a3b2cb2 | ||
|
|
2200ba063c | ||
|
|
9bcbfa65c2 | ||
|
|
96306a0910 | ||
|
|
3389aa3e4c | ||
|
|
b282e6ccc1 | ||
|
|
d085c6a822 | ||
|
|
13b518e96d | ||
|
|
1b877bc9fc |
BIN
.github/logos/gaze_crop.png
vendored
Normal file
BIN
.github/logos/gaze_crop.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 716 KiB |
BIN
.github/logos/gaze_org.png
vendored
Normal file
BIN
.github/logos/gaze_org.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 673 KiB |
38
.github/workflows/ci.yml
vendored
38
.github/workflows/ci.yml
vendored
@@ -10,14 +10,31 @@ on:
|
||||
- main
|
||||
- develop
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
test:
|
||||
runs-on: ${{ matrix.os }}
|
||||
timeout-minutes: 15
|
||||
needs: lint
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -27,7 +44,7 @@ jobs:
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache: "pip"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -38,21 +55,15 @@ jobs:
|
||||
run: |
|
||||
python -c "import onnxruntime as ort; print('Available providers:', ort.get_available_providers())"
|
||||
|
||||
- name: Lint with ruff (if available)
|
||||
run: |
|
||||
pip install ruff || true
|
||||
ruff check . --exit-zero || true
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run tests
|
||||
run: pytest -v --tb=short
|
||||
|
||||
- name: Test package imports
|
||||
run: |
|
||||
python -c "from uniface import RetinaFace, ArcFace, Landmark106, AgeGender; print('All imports successful')"
|
||||
run: python -c "import uniface; print(f'uniface {uniface.__version__} loaded with {len(uniface.__all__)} exports')"
|
||||
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: test
|
||||
|
||||
steps:
|
||||
@@ -62,8 +73,8 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: 'pip'
|
||||
python-version: "3.11"
|
||||
cache: "pip"
|
||||
|
||||
- name: Install build tools
|
||||
run: |
|
||||
@@ -84,4 +95,3 @@ jobs:
|
||||
name: dist-python-${{ github.sha }}
|
||||
path: dist/
|
||||
retention-days: 7
|
||||
|
||||
|
||||
17
.github/workflows/publish.yml
vendored
17
.github/workflows/publish.yml
vendored
@@ -5,9 +5,14 @@ on:
|
||||
tags:
|
||||
- "v*.*.*" # Trigger only on version tags like v0.1.9
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
outputs:
|
||||
version: ${{ steps.get_version.outputs.version }}
|
||||
tag_version: ${{ steps.get_version.outputs.tag_version }}
|
||||
@@ -16,13 +21,18 @@ jobs:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Get version from tag and pyproject.toml
|
||||
id: get_version
|
||||
run: |
|
||||
TAG_VERSION=${GITHUB_REF#refs/tags/v}
|
||||
echo "tag_version=$TAG_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
PYPROJECT_VERSION=$(grep -Po '(?<=^version = ")[^"]*' pyproject.toml)
|
||||
PYPROJECT_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
|
||||
echo "version=$PYPROJECT_VERSION" >> $GITHUB_OUTPUT
|
||||
|
||||
echo "Tag version: v$TAG_VERSION"
|
||||
@@ -38,12 +48,13 @@ jobs:
|
||||
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
needs: validate
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.11", "3.13"]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -65,6 +76,7 @@ jobs:
|
||||
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
needs: [validate, test]
|
||||
permissions:
|
||||
contents: write
|
||||
@@ -105,4 +117,3 @@ jobs:
|
||||
with:
|
||||
files: dist/*
|
||||
generate_release_notes: true
|
||||
|
||||
|
||||
40
.pre-commit-config.yaml
Normal file
40
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
# Pre-commit configuration for UniFace
|
||||
# See https://pre-commit.com for more information
|
||||
# See https://pre-commit.com/hooks.html for more hooks
|
||||
|
||||
repos:
|
||||
# General file checks
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v6.0.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-yaml
|
||||
- id: check-toml
|
||||
- id: check-added-large-files
|
||||
args: ['--maxkb=1000']
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: check-ast
|
||||
|
||||
# Ruff - Fast Python linter and formatter
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.14.10
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix, --unsafe-fixes, --exit-non-zero-on-fix]
|
||||
- id: ruff-format
|
||||
|
||||
# Security checks
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.9.2
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: [-c, pyproject.toml]
|
||||
additional_dependencies: ['bandit[toml]']
|
||||
exclude: ^tests/
|
||||
|
||||
# Configuration
|
||||
ci:
|
||||
autofix_commit_msg: 'style: auto-fix by pre-commit hooks'
|
||||
autoupdate_commit_msg: 'chore: update pre-commit hooks'
|
||||
165
CONTRIBUTING.md
165
CONTRIBUTING.md
@@ -16,16 +16,9 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
|
||||
2. Create a new branch for your feature
|
||||
3. Write clear, documented code with type hints
|
||||
4. Add tests for new functionality
|
||||
5. Ensure all tests pass
|
||||
5. Ensure all tests pass and pre-commit hooks are satisfied
|
||||
6. Submit a pull request with a clear description
|
||||
|
||||
### Code Style
|
||||
|
||||
- Follow PEP8 guidelines
|
||||
- Use type hints (Python 3.10+)
|
||||
- Write docstrings for public APIs
|
||||
- Keep code simple and readable
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
@@ -34,30 +27,164 @@ cd uniface
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
### Setting Up Pre-commit Hooks
|
||||
|
||||
We use [pre-commit](https://pre-commit.com/) to ensure code quality and consistency. Install and configure it:
|
||||
|
||||
```bash
|
||||
# Install pre-commit
|
||||
pip install pre-commit
|
||||
|
||||
# Install the git hooks
|
||||
pre-commit install
|
||||
|
||||
# (Optional) Run against all files
|
||||
pre-commit run --all-files
|
||||
```
|
||||
|
||||
Once installed, pre-commit will automatically run on every commit to check:
|
||||
|
||||
- Code formatting and linting (Ruff)
|
||||
- Security issues (Bandit)
|
||||
- General file hygiene (trailing whitespace, YAML/TOML validity, etc.)
|
||||
|
||||
**Note:** All PRs are automatically checked by CI. The merge button will only be available after all checks pass.
|
||||
|
||||
## Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting, following modern Python best practices. Pre-commit handles all formatting automatically.
|
||||
|
||||
### Style Guidelines
|
||||
|
||||
#### General Rules
|
||||
|
||||
- **Line length:** 120 characters maximum
|
||||
- **Python version:** 3.11+ (use modern syntax)
|
||||
- **Quote style:** Single quotes for strings, double quotes for docstrings
|
||||
|
||||
#### Type Hints
|
||||
|
||||
Use modern Python 3.11+ type hints (PEP 585 and PEP 604):
|
||||
|
||||
```python
|
||||
# Preferred (modern)
|
||||
def process(items: list[str], config: dict[str, int] | None = None) -> tuple[int, str]:
|
||||
...
|
||||
|
||||
# Avoid (legacy)
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
def process(items: List[str], config: Optional[Dict[str, int]] = None) -> Tuple[int, str]:
|
||||
...
|
||||
```
|
||||
|
||||
#### Docstrings
|
||||
|
||||
Use [Google-style docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for all public APIs:
|
||||
|
||||
```python
|
||||
def detect_faces(image: np.ndarray, threshold: float = 0.5) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as a numpy array with shape (H, W, C) in BGR format.
|
||||
threshold: Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
|
||||
Returns:
|
||||
List of Face objects containing bounding boxes, confidence scores,
|
||||
and facial landmarks.
|
||||
|
||||
Raises:
|
||||
ValueError: If the input image has invalid dimensions.
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> faces = detect_faces(image, threshold=0.8)
|
||||
>>> print(f"Found {len(faces)} faces")
|
||||
"""
|
||||
```
|
||||
|
||||
#### Import Order
|
||||
|
||||
Imports are automatically sorted by Ruff with the following order:
|
||||
|
||||
1. **Future** imports (`from __future__ import annotations`)
|
||||
2. **Standard library** (`os`, `sys`, `typing`, etc.)
|
||||
3. **Third-party** (`numpy`, `cv2`, `onnxruntime`, etc.)
|
||||
4. **First-party** (`uniface.*`)
|
||||
5. **Local** (relative imports like `.base`, `.models`)
|
||||
|
||||
```python
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
|
||||
from .base import BaseDetector
|
||||
```
|
||||
|
||||
#### Code Comments
|
||||
|
||||
- Add comments for complex logic, magic numbers, and non-obvious behavior
|
||||
- Avoid comments that merely restate the code
|
||||
- Use `# TODO:` with issue links for planned improvements
|
||||
|
||||
```python
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
# Add small epsilon to prevent division by zero
|
||||
similarity = np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-5)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
pytest tests/
|
||||
|
||||
# Run with verbose output
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_factory.py
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=uniface --cov-report=html
|
||||
```
|
||||
|
||||
## Adding New Features
|
||||
|
||||
When adding a new model or feature:
|
||||
|
||||
1. **Create the model class** in the appropriate submodule (e.g., `uniface/detection/`)
|
||||
2. **Add weight constants** to `uniface/constants.py` with URLs and SHA256 hashes
|
||||
3. **Export in `__init__.py`** files at both module and package levels
|
||||
4. **Write tests** in `tests/` directory
|
||||
5. **Add example usage** in `tools/` or update existing notebooks
|
||||
6. **Update documentation** if needed
|
||||
|
||||
## Examples
|
||||
|
||||
Example notebooks demonstrating library usage:
|
||||
|
||||
| Example | Notebook |
|
||||
|---------|----------|
|
||||
| Face Detection | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| Face Alignment | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| Face Recognition | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| Face Verification | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| Face Search | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| Face Detection | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| Face Alignment | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| Face Verification | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| Face Search | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| Face Analyzer | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| Face Parsing | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| Face Anonymization | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| Gaze Estimation | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
## Questions?
|
||||
|
||||
Open an issue or start a discussion on GitHub.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
116
MODELS.md
116
MODELS.md
@@ -20,7 +20,7 @@ RetinaFace models are trained on the WIDER FACE dataset and provide excellent ac
|
||||
| `RESNET34` | 24.8M | 56MB | 94.16% | 93.12% | 88.90% | Maximum accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set (Easy/Medium/Hard subsets) - from [RetinaFace paper](https://arxiv.org/abs/1905.00641)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
@@ -34,7 +34,7 @@ detector = RetinaFace() # Uses MNET_V2
|
||||
# Specific model
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025, # Fastest
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
nms_thresh=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
@@ -52,7 +52,7 @@ SCRFD (Sample and Computation Redistribution for Efficient Face Detection) model
|
||||
| `SCRFD_10G` ⭐ | 4.2M | 17MB | 95.16% | 93.87% | 83.05% | **High accuracy + speed** |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [SCRFD paper](https://arxiv.org/abs/2105.04714)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
|
||||
#### Usage
|
||||
|
||||
@@ -63,14 +63,14 @@ from uniface.constants import SCRFDWeights
|
||||
# Fast real-time detection
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
@@ -87,7 +87,7 @@ YOLOv5-Face models provide excellent detection accuracy with 5-point facial land
|
||||
| `YOLOV5M` | 82MB | 95.30% | 93.76% | 85.28% | High accuracy |
|
||||
|
||||
**Accuracy**: WIDER FACE validation set - from [YOLOv5-Face paper](https://arxiv.org/abs/2105.12931)
|
||||
**Speed**: Benchmark on your own hardware using `scripts/run_detection.py --iterations 100`
|
||||
**Speed**: Benchmark on your own hardware using `tools/detection.py --source <image> --iterations 100`
|
||||
**Note**: Fixed input size of 640×640. Models exported to ONNX from [deepcam-cn/yolov5-face](https://github.com/deepcam-cn/yolov5-face)
|
||||
|
||||
#### Usage
|
||||
@@ -99,29 +99,29 @@ from uniface.constants import YOLOv5FaceWeights
|
||||
# Lightweight/Mobile
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5N,
|
||||
conf_thresh=0.6,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# Real-time detection (recommended)
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5M,
|
||||
conf_thresh=0.6
|
||||
confidence_threshold=0.6
|
||||
)
|
||||
|
||||
# Detect faces with landmarks
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks (5, 2)
|
||||
bbox = face.bbox # [x1, y1, x2, y2]
|
||||
confidence = face.confidence
|
||||
landmarks = face.landmarks # 5-point landmarks (5, 2)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -259,9 +259,40 @@ landmarks = landmarker.get_landmarks(image, bbox)
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: (gender, age_in_years)
|
||||
# gender: 0 for Female, 1 for Male
|
||||
result = predictor.predict(image, bbox)
|
||||
# Returns: AttributeResult with gender, age, sex property
|
||||
# result.gender: 0 for Female, 1 for Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### FairFace Attributes
|
||||
|
||||
| Model Name | Attributes | Params | Size | Use Case |
|
||||
| ----------- | --------------------- | ------ | ----- | --------------------------- |
|
||||
| `DEFAULT` | Race, Gender, Age Group | - | 44MB | Balanced demographic prediction |
|
||||
|
||||
**Dataset**: Trained on FairFace dataset with balanced demographics
|
||||
**Note**: FairFace provides more equitable predictions across different racial and gender groups
|
||||
|
||||
**Race Categories (7):** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups (9):** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import FairFace
|
||||
|
||||
predictor = FairFace()
|
||||
result = predictor.predict(image, bbox)
|
||||
# Returns: AttributeResult with gender, age_group, race, sex property
|
||||
# result.gender: 0 for Female, 1 for Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
---
|
||||
@@ -286,7 +317,9 @@ from uniface import Emotion
|
||||
from uniface.constants import DDAMFNWeights
|
||||
|
||||
predictor = Emotion(model_name=DDAMFNWeights.AFFECNET7)
|
||||
emotion, confidence = predictor.predict(image, landmarks)
|
||||
result = predictor.predict(image, landmarks)
|
||||
# result.emotion: predicted emotion label
|
||||
# result.confidence: confidence score
|
||||
```
|
||||
|
||||
---
|
||||
@@ -324,8 +357,8 @@ gaze_estimator = MobileGaze() # Uses RESNET34
|
||||
gaze_estimator = MobileGaze(model_name=GazeWeights.MOBILEONE_S0)
|
||||
|
||||
# Estimate gaze from face crop
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(pitch):.1f}°, Yaw: {np.degrees(yaw):.1f}°")
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Pitch: {np.degrees(result.pitch):.1f}°, Yaw: {np.degrees(result.yaw):.1f}°")
|
||||
```
|
||||
|
||||
**Note**: Requires face crop as input. Use face detection first to obtain bounding boxes.
|
||||
@@ -404,6 +437,47 @@ print(f"Detected {len(np.unique(mask))} facial components")
|
||||
|
||||
---
|
||||
|
||||
## Anti-Spoofing Models
|
||||
|
||||
### MiniFASNet Family
|
||||
|
||||
Lightweight face anti-spoofing models for liveness detection. Detect if a face is real (live) or fake (photo, video replay, mask).
|
||||
|
||||
| Model Name | Size | Scale | Use Case |
|
||||
| ---------- | ------ | ----- | ----------------------------- |
|
||||
| `V1SE` | 1.2 MB | 4.0 | Squeeze-and-excitation variant |
|
||||
| `V2` ⭐ | 1.2 MB | 2.7 | **Recommended default** |
|
||||
|
||||
**Dataset**: Trained on face anti-spoofing datasets
|
||||
**Output**: Returns `SpoofingResult(is_real, confidence)` where is_real: True=Real, False=Fake
|
||||
|
||||
#### Usage
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
|
||||
# Default (V2, recommended)
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet()
|
||||
|
||||
# V1SE variant
|
||||
spoofer = MiniFASNet(model_name=MiniFASNetWeights.V1SE)
|
||||
|
||||
# Detect and check liveness
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
**Note**: Requires face bounding box from a detector. Use with RetinaFace, SCRFD, or YOLOv5Face.
|
||||
|
||||
---
|
||||
|
||||
## Model Updates
|
||||
|
||||
Models are automatically downloaded and cached on first use. Cache location: `~/.uniface/models/`
|
||||
@@ -427,10 +501,10 @@ model_path = verify_model_weights(
|
||||
|
||||
```bash
|
||||
# Using the provided script
|
||||
python scripts/download_model.py
|
||||
python tools/download_model.py
|
||||
|
||||
# Download specific model
|
||||
python scripts/download_model.py --model MNET_V2
|
||||
python tools/download_model.py --model MNET_V2
|
||||
```
|
||||
|
||||
---
|
||||
@@ -445,6 +519,8 @@ python scripts/download_model.py --model MNET_V2
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
### Papers
|
||||
|
||||
237
QUICKSTART.md
237
QUICKSTART.md
@@ -39,9 +39,9 @@ faces = detector.detect(image)
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
@@ -159,9 +159,9 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
@@ -199,9 +199,11 @@ faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender_str}, {age} years old")
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age} years old")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
**Output:**
|
||||
@@ -213,6 +215,45 @@ Face 2: Female, 28 years old
|
||||
|
||||
---
|
||||
|
||||
## 5b. FairFace Attributes (2 minutes)
|
||||
|
||||
Detect race, gender, and age group with balanced demographics:
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
# Initialize models
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
# Load image
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f"Face {i+1}: {result.sex}, {result.age_group}, {result.race}")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 30-39, East Asian
|
||||
Face 2: Female, 20-29, White
|
||||
```
|
||||
|
||||
**Race Categories:** White, Black, Latino Hispanic, East Asian, Southeast Asian, Indian, Middle Eastern
|
||||
|
||||
**Age Groups:** 0-2, 3-9, 10-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70+
|
||||
|
||||
---
|
||||
|
||||
## 6. Facial Landmarks (2 minutes)
|
||||
|
||||
Detect 106 facial landmarks:
|
||||
@@ -230,7 +271,7 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
@@ -262,16 +303,15 @@ faces = detector.detect(image)
|
||||
|
||||
# Estimate gaze for each face
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Face {i+1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
@@ -328,7 +368,138 @@ Detected 12 facial components
|
||||
|
||||
---
|
||||
|
||||
## 9. Batch Processing (3 minutes)
|
||||
## 9. Face Anonymization (2 minutes)
|
||||
|
||||
Automatically blur faces for privacy protection:
|
||||
|
||||
```python
|
||||
from uniface.privacy import anonymize_faces
|
||||
import cv2
|
||||
|
||||
# One-liner: automatic detection and blurring
|
||||
image = cv2.imread("group_photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
print("Faces anonymized successfully!")
|
||||
```
|
||||
|
||||
**Manual control with custom parameters:**
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
# Initialize detector and blurrer
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
# Detect and anonymize
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
cv2.imwrite("output.jpg", anonymized)
|
||||
```
|
||||
|
||||
**Available blur methods:**
|
||||
|
||||
```python
|
||||
# Pixelation (news media standard)
|
||||
blurrer = BlurFace(method='pixelate', pixel_blocks=8)
|
||||
|
||||
# Gaussian blur (smooth, natural)
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=4.0)
|
||||
|
||||
# Black boxes (maximum privacy)
|
||||
blurrer = BlurFace(method='blackout', color=(0, 0, 0))
|
||||
|
||||
# Elliptical blur (natural face shape)
|
||||
blurrer = BlurFace(method='elliptical', blur_strength=3.0, margin=30)
|
||||
|
||||
# Median blur (edge-preserving)
|
||||
blurrer = BlurFace(method='median', blur_strength=3.0)
|
||||
```
|
||||
|
||||
**Webcam anonymization:**
|
||||
|
||||
```python
|
||||
import cv2
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
**Command-line tool:**
|
||||
|
||||
```bash
|
||||
# Anonymize image with pixelation
|
||||
python tools/face_anonymize.py --source photo.jpg
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python tools/face_anonymize.py --source 0 --method gaussian
|
||||
|
||||
# Custom blur strength
|
||||
python tools/face_anonymize.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Face Anti-Spoofing (2 minutes)
|
||||
|
||||
Detect if a face is real or fake (photo, video replay, mask):
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet() # Uses V2 by default
|
||||
|
||||
image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({result.confidence:.1%})")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Real (98.5%)
|
||||
```
|
||||
|
||||
**Command-line tool:**
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python tools/spoofing.py --source photo.jpg
|
||||
|
||||
# Webcam
|
||||
python tools/spoofing.py --source 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 11. Batch Processing (3 minutes)
|
||||
|
||||
Process multiple images:
|
||||
|
||||
@@ -361,7 +532,7 @@ print("Done!")
|
||||
|
||||
---
|
||||
|
||||
## 10. Model Selection
|
||||
## 12. Model Selection
|
||||
|
||||
Choose the right model for your use case:
|
||||
|
||||
@@ -374,7 +545,7 @@ from uniface.constants import RetinaFaceWeights, SCRFDWeights, YOLOv5FaceWeights
|
||||
# Fast detection (mobile/edge devices)
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_025,
|
||||
conf_thresh=0.7
|
||||
confidence_threshold=0.7
|
||||
)
|
||||
|
||||
# Balanced (recommended)
|
||||
@@ -385,14 +556,14 @@ detector = RetinaFace(
|
||||
# Real-time with high accuracy
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
confidence_threshold=0.6,
|
||||
nms_thresh=0.5
|
||||
)
|
||||
|
||||
# High accuracy (server/GPU)
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
confidence_threshold=0.5
|
||||
)
|
||||
```
|
||||
|
||||
@@ -497,13 +668,14 @@ Explore interactive examples for common tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
|
||||
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
### Additional Resources
|
||||
|
||||
@@ -519,4 +691,5 @@ Explore interactive examples for common tasks:
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition)
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation)
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing)
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - Race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface)
|
||||
|
||||
195
README.md
195
README.md
@@ -1,11 +1,15 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.python.org/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
@@ -22,7 +26,9 @@
|
||||
- **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings
|
||||
- **Face Parsing**: BiSeNet-based semantic segmentation with 19 facial component classes
|
||||
- **Gaze Estimation**: Real-time gaze direction prediction with MobileGaze
|
||||
- **Attribute Analysis**: Age, gender, and emotion detection
|
||||
- **Attribute Analysis**: Age, gender, race (FairFace), and emotion detection
|
||||
- **Anti-Spoofing**: Face liveness detection with MiniFASNet models
|
||||
- **Face Anonymization**: Privacy-preserving face blurring with 5 methods (pixelate, gaussian, blackout, elliptical, median)
|
||||
- **Face Alignment**: Precise alignment for downstream tasks
|
||||
- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback
|
||||
- **Simple API**: Intuitive factory functions and clean interfaces
|
||||
@@ -99,9 +105,9 @@ faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
|
||||
confidence = face.confidence
|
||||
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
@@ -119,8 +125,8 @@ recognizer = ArcFace()
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
@@ -136,7 +142,7 @@ detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
@@ -149,9 +155,28 @@ detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"{gender_str}, {age} years old")
|
||||
result = age_gender.predict(image, faces[0].bbox)
|
||||
print(f"{result.sex}, {result.age} years old")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age: age in years
|
||||
```
|
||||
|
||||
### FairFace Attributes (Race, Gender, Age Group)
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace, FairFace
|
||||
|
||||
detector = RetinaFace()
|
||||
fairface = FairFace()
|
||||
|
||||
faces = detector.detect(image)
|
||||
result = fairface.predict(image, faces[0].bbox)
|
||||
print(f"{result.sex}, {result.age_group}, {result.race}")
|
||||
# result.gender: 0=Female, 1=Male
|
||||
# result.sex: "Female" or "Male"
|
||||
# result.age_group: "20-29", "30-39", etc.
|
||||
# result.race: "East Asian", "White", etc.
|
||||
```
|
||||
|
||||
### Gaze Estimation
|
||||
@@ -166,15 +191,14 @@ gaze_estimator = MobileGaze()
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f"Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°")
|
||||
|
||||
# Visualize
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
draw_gaze(image, face.bbox, result.pitch, result.yaw)
|
||||
```
|
||||
|
||||
### Face Parsing
|
||||
@@ -198,6 +222,78 @@ vis_result = vis_parsing_maps(face_rgb, mask, save_image=False)
|
||||
print(f"Unique classes: {len(np.unique(mask))}")
|
||||
```
|
||||
|
||||
### Face Anti-Spoofing
|
||||
|
||||
Detect if a face is real or fake (photo, video replay, mask):
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.spoofing import MiniFASNet
|
||||
|
||||
detector = RetinaFace()
|
||||
spoofer = MiniFASNet() # Uses V2 by default
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
# result.is_real: True for real, False for fake
|
||||
# result.confidence: confidence score
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f"{label}: {result.confidence:.1%}")
|
||||
```
|
||||
|
||||
### Face Anonymization
|
||||
|
||||
Protect privacy by blurring or pixelating faces with 5 different methods:
|
||||
|
||||
```python
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace, anonymize_faces
|
||||
import cv2
|
||||
|
||||
# Method 1: One-liner with automatic detection
|
||||
image = cv2.imread("photo.jpg")
|
||||
anonymized = anonymize_faces(image, method='pixelate')
|
||||
cv2.imwrite("anonymized.jpg", anonymized)
|
||||
|
||||
# Method 2: Manual control with custom parameters
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='gaussian', blur_strength=5.0)
|
||||
|
||||
faces = detector.detect(image)
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
|
||||
# Available blur methods:
|
||||
methods = {
|
||||
'pixelate': BlurFace(method='pixelate', pixel_blocks=10), # Blocky effect (news media standard)
|
||||
'gaussian': BlurFace(method='gaussian', blur_strength=3.0), # Smooth, natural blur
|
||||
'blackout': BlurFace(method='blackout', color=(0, 0, 0)), # Solid color boxes (maximum privacy)
|
||||
'elliptical': BlurFace(method='elliptical', margin=20), # Soft oval blur (natural face shape)
|
||||
'median': BlurFace(method='median', blur_strength=3.0) # Edge-preserving blur
|
||||
}
|
||||
|
||||
# Real-time webcam anonymization
|
||||
cap = cv2.VideoCapture(0)
|
||||
detector = RetinaFace()
|
||||
blurrer = BlurFace(method='pixelate')
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.imshow('Anonymized', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
@@ -216,6 +312,7 @@ print(f"Unique classes: {len(np.unique(mask))}")
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.landmark import Landmark106
|
||||
from uniface.privacy import BlurFace, anonymize_faces
|
||||
|
||||
from uniface.constants import SCRFDWeights
|
||||
|
||||
@@ -225,7 +322,7 @@ detector = RetinaFace()
|
||||
# Create with custom config
|
||||
detector = SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS, # SCRFDWeights.SCRFD_500M_KPS
|
||||
conf_thresh=0.4,
|
||||
confidence_threshold=0.4,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
# Or with defaults settings: detector = SCRFD()
|
||||
@@ -244,16 +341,16 @@ from uniface.constants import RetinaFaceWeights, YOLOv5FaceWeights
|
||||
# Detection
|
||||
detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4
|
||||
)
|
||||
# Or detector = RetinaFace()
|
||||
|
||||
# YOLOv5-Face detection
|
||||
detector = YOLOv5Face(
|
||||
model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh=0.6,
|
||||
nms_thresh=0.5
|
||||
confidence_threshold=0.6,
|
||||
nms_threshold=0.5
|
||||
)
|
||||
# Or detector = YOLOv5Face
|
||||
|
||||
@@ -269,7 +366,7 @@ recognizer = SphereFace() # Angular softmax alternative
|
||||
from uniface import detect_faces
|
||||
|
||||
# One-line face detection
|
||||
faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: retinaface, scrfd, yolov5face
|
||||
faces = detect_faces(image, method='retinaface', confidence_threshold=0.8) # methods: retinaface, scrfd, yolov5face
|
||||
```
|
||||
|
||||
### Key Parameters (quick reference)
|
||||
@@ -278,9 +375,9 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| -------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
|
||||
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
|
||||
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `conf_thresh=0.5`, `nms_thresh=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
|
||||
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `conf_thresh=0.6`, `nms_thresh=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
|
||||
| `RetinaFace` | `model_name=RetinaFaceWeights.MNET_V2`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)`, `dynamic_size=False` | Supports 5-point landmarks |
|
||||
| `SCRFD` | `model_name=SCRFDWeights.SCRFD_10G_KPS`, `confidence_threshold=0.5`, `nms_threshold=0.4`, `input_size=(640, 640)` | Supports 5-point landmarks |
|
||||
| `YOLOv5Face` | `model_name=YOLOv5FaceWeights.YOLOV5S`, `confidence_threshold=0.6`, `nms_threshold=0.5`, `input_size=640` (fixed) | Supports 5-point landmarks; models: YOLOV5N/S/M; `input_size` must be 640 |
|
||||
|
||||
**Recognition**
|
||||
|
||||
@@ -295,14 +392,15 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| --------------- | --------------------------------------------------------------------- | --------------------------------------- |
|
||||
| `Landmark106` | No required params | 106-point landmarks |
|
||||
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Requires bbox; ONNXRuntime |
|
||||
| `AgeGender` | `model_name=AgeGenderWeights.DEFAULT`; `input_size` auto-detected | Returns `AttributeResult` with gender, age |
|
||||
| `FairFace` | `model_name=FairFaceWeights.DEFAULT`, `input_size=(224, 224)` | Returns `AttributeResult` with gender, age_group, race |
|
||||
| `Emotion` | `model_weights=DDAMFNWeights.AFFECNET7`, `input_size=(112, 112)` | Requires 5-point landmarks; TorchScript |
|
||||
|
||||
**Gaze Estimation**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ------------- | ------------------------------------------ | ------------------------------------ |
|
||||
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns (pitch, yaw) angles in radians; trained on Gaze360 |
|
||||
| `MobileGaze` | `model_name=GazeWeights.RESNET34` | Returns `GazeResult(pitch, yaw)` in radians; trained on Gaze360 |
|
||||
|
||||
**Face Parsing**
|
||||
|
||||
@@ -310,6 +408,12 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8) # methods: re
|
||||
| ---------- | ---------------------------------------- | ------------------------------------ |
|
||||
| `BiSeNet` | `model_name=ParsingWeights.RESNET18`, `input_size=(512, 512)` | 19 facial component classes; BiSeNet architecture with ResNet backbone |
|
||||
|
||||
**Anti-Spoofing**
|
||||
|
||||
| Class | Key params (defaults) | Notes |
|
||||
| ------------- | ----------------------------------------- | ------------------------------------ |
|
||||
| `MiniFASNet` | `model_name=MiniFASNetWeights.V2` | Returns `SpoofingResult(is_real, confidence)` |
|
||||
|
||||
---
|
||||
|
||||
## Model Performance
|
||||
@@ -332,7 +436,7 @@ _Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.0
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
python tools/detection.py --source assets/test.jpg --iterations 100
|
||||
```
|
||||
|
||||
See [MODELS.md](MODELS.md) for detailed model information and selection guide.
|
||||
@@ -351,13 +455,14 @@ Interactive examples covering common face analysis tasks:
|
||||
|
||||
| Example | Description | Notebook |
|
||||
|---------|-------------|----------|
|
||||
| **Face Detection** | Detect faces and facial landmarks | [face_detection.ipynb](examples/face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [face_alignment.ipynb](examples/face_alignment.ipynb) |
|
||||
| **Face Recognition** | Extract face embeddings and compare faces | [face_analyzer.ipynb](examples/face_analyzer.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [face_verification.ipynb](examples/face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [face_search.ipynb](examples/face_search.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [face_parsing.ipynb](examples/face_parsing.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction from face images | [gaze_estimation.ipynb](examples/gaze_estimation.ipynb) |
|
||||
| **Face Detection** | Detect faces and facial landmarks | [01_face_detection.ipynb](examples/01_face_detection.ipynb) |
|
||||
| **Face Alignment** | Align and crop faces for recognition | [02_face_alignment.ipynb](examples/02_face_alignment.ipynb) |
|
||||
| **Face Verification** | Compare two faces to verify identity | [03_face_verification.ipynb](examples/03_face_verification.ipynb) |
|
||||
| **Face Search** | Find a person in a group photo | [04_face_search.ipynb](examples/04_face_search.ipynb) |
|
||||
| **Face Analyzer** | All-in-one detection, recognition & attributes | [05_face_analyzer.ipynb](examples/05_face_analyzer.ipynb) |
|
||||
| **Face Parsing** | Segment face into semantic components | [06_face_parsing.ipynb](examples/06_face_parsing.ipynb) |
|
||||
| **Face Anonymization** | Blur or pixelate faces for privacy protection | [07_face_anonymization.ipynb](examples/07_face_anonymization.ipynb) |
|
||||
| **Gaze Estimation** | Estimate gaze direction from face images | [08_gaze_estimation.ipynb](examples/08_gaze_estimation.ipynb) |
|
||||
|
||||
### Webcam Face Detection
|
||||
|
||||
@@ -377,9 +482,9 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
@@ -413,7 +518,7 @@ for person_id, image_path in person_images.items():
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
image, faces[0].landmarks
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
@@ -422,7 +527,7 @@ query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
query_image, query_faces[0].landmarks
|
||||
)
|
||||
|
||||
# Find best match
|
||||
@@ -551,12 +656,14 @@ uniface/
|
||||
│ ├── parsing/ # Face parsing
|
||||
│ ├── gaze/ # Gaze estimation
|
||||
│ ├── attribute/ # Age, gender, emotion
|
||||
│ ├── spoofing/ # Face anti-spoofing
|
||||
│ ├── privacy/ # Face anonymization & blurring
|
||||
│ ├── onnx_utils.py # ONNX Runtime utilities
|
||||
│ ├── model_store.py # Model download & caching
|
||||
│ └── visualization.py # Drawing utilities
|
||||
├── tests/ # Unit tests
|
||||
├── examples/ # Example notebooks
|
||||
└── scripts/ # Utility scripts
|
||||
└── tools/ # CLI utilities
|
||||
```
|
||||
|
||||
---
|
||||
@@ -568,6 +675,8 @@ uniface/
|
||||
- **Face Recognition Training**: [yakhyo/face-recognition](https://github.com/yakhyo/face-recognition) - ArcFace, MobileFace, SphereFace training code
|
||||
- **Face Parsing Training**: [yakhyo/face-parsing](https://github.com/yakhyo/face-parsing) - BiSeNet face parsing training code and pretrained weights
|
||||
- **Gaze Estimation Training**: [yakhyo/gaze-estimation](https://github.com/yakhyo/gaze-estimation) - MobileGaze training code and pretrained weights
|
||||
- **Face Anti-Spoofing**: [yakhyo/face-anti-spoofing](https://github.com/yakhyo/face-anti-spoofing) - MiniFASNet ONNX inference (weights from [minivision-ai/Silent-Face-Anti-Spoofing](https://github.com/minivision-ai/Silent-Face-Anti-Spoofing))
|
||||
- **FairFace**: [yakhyo/fairface-onnx](https://github.com/yakhyo/fairface-onnx) - FairFace ONNX inference for race, gender, age prediction
|
||||
- **InsightFace**: [deepinsight/insightface](https://github.com/deepinsight/insightface) - Model architectures and pretrained weights
|
||||
|
||||
## Contributing
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -82,8 +82,8 @@
|
||||
],
|
||||
"source": [
|
||||
"detector = RetinaFace(\n",
|
||||
" conf_thresh=0.5,\n",
|
||||
" nms_thresh=0.4,\n",
|
||||
" confidence_threshold=0.5,\n",
|
||||
" nms_threshold=0.4,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -153,14 +153,14 @@
|
||||
"# Load image\n",
|
||||
"image = cv2.imread(image_path)\n",
|
||||
"\n",
|
||||
"# Detect faces - returns list of face dictionaries\n",
|
||||
"# Detect faces - returns list of Face objects\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"# Unpack face data for visualization\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"# Draw detections\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
@@ -211,9 +211,9 @@
|
||||
"faces = detector.detect(image, max_num=2)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -258,9 +258,9 @@
|
||||
"faces = detector.detect(image, max_num=5)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -274,7 +274,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
|
||||
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
|
||||
"- Use `max_num` to limit detected faces"
|
||||
]
|
||||
@@ -48,7 +48,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -87,8 +87,8 @@
|
||||
],
|
||||
"source": [
|
||||
"detector = RetinaFace(\n",
|
||||
" conf_thresh=0.5,\n",
|
||||
" nms_thresh=0.4,\n",
|
||||
" confidence_threshold=0.5,\n",
|
||||
" nms_threshold=0.4,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -140,13 +140,13 @@
|
||||
"\n",
|
||||
" # Draw detections\n",
|
||||
" bbox_image = image.copy()\n",
|
||||
" bboxes = [f['bbox'] for f in faces]\n",
|
||||
" scores = [f['confidence'] for f in faces]\n",
|
||||
" landmarks = [f['landmarks'] for f in faces]\n",
|
||||
" bboxes = [f.bbox for f in faces]\n",
|
||||
" scores = [f.confidence for f in faces]\n",
|
||||
" landmarks = [f.landmarks for f in faces]\n",
|
||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
||||
" first_landmarks = faces[0]['landmarks']\n",
|
||||
" first_landmarks = faces[0].landmarks\n",
|
||||
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
|
||||
"\n",
|
||||
" # Convert BGR to RGB for visualization\n",
|
||||
@@ -202,7 +202,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
|
||||
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
|
||||
"- Default output size is 112x112 (standard for face recognition models)\n"
|
||||
]
|
||||
@@ -37,7 +37,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -78,7 +78,7 @@
|
||||
],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace()\n",
|
||||
")"
|
||||
]
|
||||
@@ -42,7 +42,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -74,7 +74,7 @@
|
||||
],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace()\n",
|
||||
")"
|
||||
]
|
||||
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -88,7 +88,7 @@
|
||||
],
|
||||
"source": [
|
||||
"analyzer = FaceAnalyzer(\n",
|
||||
" detector=RetinaFace(conf_thresh=0.5),\n",
|
||||
" detector=RetinaFace(confidence_threshold=0.5),\n",
|
||||
" recognizer=ArcFace(),\n",
|
||||
" age_gender=AgeGender()\n",
|
||||
")"
|
||||
@@ -46,7 +46,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 1.5.0\n"
|
||||
"UniFace version: 2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -365,7 +365,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -379,7 +379,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
325
examples/07_face_anonymization.ipynb
Normal file
325
examples/07_face_anonymization.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 1.4.0\n"
|
||||
"UniFace version: 2.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -86,7 +86,7 @@
|
||||
],
|
||||
"source": [
|
||||
"# Initialize face detector\n",
|
||||
"detector = RetinaFace(conf_thresh=0.5)\n",
|
||||
"detector = RetinaFace(confidence_threshold=0.5)\n",
|
||||
"\n",
|
||||
"# Initialize gaze estimator (uses ResNet34 by default)\n",
|
||||
"gaze_estimator = MobileGaze()"
|
||||
@@ -103,7 +103,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -152,8 +152,7 @@
|
||||
"\n",
|
||||
" # Estimate gaze for each face\n",
|
||||
" for i, face in enumerate(faces):\n",
|
||||
" bbox = face['bbox']\n",
|
||||
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
|
||||
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
|
||||
" face_crop = image[y1:y2, x1:x2]\n",
|
||||
"\n",
|
||||
" if face_crop.size > 0:\n",
|
||||
@@ -164,7 +163,7 @@
|
||||
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
|
||||
"\n",
|
||||
" # Draw gaze without angle text\n",
|
||||
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
|
||||
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
|
||||
"\n",
|
||||
" # Convert BGR to RGB for display\n",
|
||||
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
|
||||
@@ -249,7 +248,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -263,7 +262,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.5.0"
|
||||
version = "2.0.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
@@ -9,7 +9,7 @@ maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
requires-python = ">=3.11,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
@@ -34,7 +34,6 @@ classifiers = [
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
@@ -73,7 +72,7 @@ uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
target-version = "py311"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
@@ -90,13 +89,60 @@ exclude = [
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
|
||||
docstring-code-format = true
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
select = [
|
||||
"E", # pycodestyle errors
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
"W", # pycodestyle warnings
|
||||
"UP", # pyupgrade (modern Python syntax)
|
||||
"B", # flake8-bugbear
|
||||
"C4", # flake8-comprehensions
|
||||
"SIM", # flake8-simplify
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
ignore = [
|
||||
"E501", # Line too long (handled by formatter)
|
||||
"B008", # Function call in default argument (common in FastAPI/Click)
|
||||
"SIM108", # Use ternary operator (can reduce readability)
|
||||
"RUF022", # Allow logical grouping in __all__ instead of alphabetical sorting
|
||||
]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
force-single-line = false
|
||||
force-sort-within-sections = true
|
||||
known-first-party = ["uniface"]
|
||||
section-order = [
|
||||
"future",
|
||||
"standard-library",
|
||||
"third-party",
|
||||
"first-party",
|
||||
"local-folder",
|
||||
]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
warn_return_any = false
|
||||
warn_unused_ignores = true
|
||||
ignore_missing_imports = true
|
||||
exclude = ["tests/", "scripts/", "examples/"]
|
||||
# Disable strict return type checking for numpy operations
|
||||
disable_error_code = ["no-any-return"]
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["tests", "scripts", "examples"]
|
||||
skips = ["B101", "B614"] # B101: assert, B614: torch.jit.load (models are SHA256 verified)
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
python_files = ["test_*.py"]
|
||||
python_functions = ["test_*"]
|
||||
addopts = "-v --tb=short"
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
# Scripts
|
||||
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `run_gaze_estimation.py` | Gaze direction estimation |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Emotion detection
|
||||
python scripts/run_emotion.py --image assets/test.jpg
|
||||
python scripts/run_emotion.py --webcam
|
||||
|
||||
# Gaze estimation
|
||||
python scripts/run_gaze_estimation.py --image assets/test.jpg
|
||||
python scripts/run_gaze_estimation.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--method` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.25) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
@@ -1,130 +0,0 @@
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f' Face {i + 1}: {gender_str}, {age} years old')
|
||||
draw_age_gender_label(image, face['bbox'], gender_id, age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
|
||||
draw_age_gender_label(frame, face['bbox'], gender_id, age)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,110 +0,0 @@
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,126 +0,0 @@
|
||||
# Emotion detection on detected faces
|
||||
# Usage: python run_emotion.py --image path/to/image.jpg
|
||||
# python run_emotion.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
|
||||
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
|
||||
draw_emotion_label(image, face['bbox'], emotion, confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
|
||||
draw_emotion_label(frame, face['bbox'], emotion, confidence)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, emotion_predictor, args.threshold)
|
||||
else:
|
||||
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,116 +0,0 @@
|
||||
# Face analysis using FaceAnalyzer
|
||||
# Usage: python run_face_analyzer.py --image path/to/image.jpg
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f'Error: Image not found: {args.image}')
|
||||
return
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,126 +0,0 @@
|
||||
# Face parsing on detected faces
|
||||
# Usage: python run_face_parsing.py --image path/to/image.jpg
|
||||
# python run_face_parsing.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
# Parse the face
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
# Visualize the parsing result
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
# Place the visualization back on the original image
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, parser):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
# Parse the face
|
||||
mask = parser.parse(face_crop)
|
||||
|
||||
# Visualize the parsing result
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
# Place the visualization back on the frame
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
|
||||
# Draw bounding box
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--image', type=str, help='Path to input image')
|
||||
parser_arg.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser_arg.add_argument('--save_dir', type=str, default='outputs')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser_arg.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, parser)
|
||||
else:
|
||||
process_image(detector, parser, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,97 +0,0 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0]['landmarks']
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError('Webcam could not be opened.')
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
landmarks = face['landmarks']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# green = match, red = unknown
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--image', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.image}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,104 +0,0 @@
|
||||
# Gaze estimation on detected faces
|
||||
# Usage: python run_gaze_estimation.py --image path/to/image.jpg
|
||||
# python run_gaze_estimation.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°')
|
||||
|
||||
# Draw both bbox and gaze arrow with angle text
|
||||
draw_gaze(image, bbox, pitch, yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, gaze_estimator):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
# Draw both bbox and gaze arrow
|
||||
draw_gaze(frame, bbox, pitch, yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, gaze_estimator)
|
||||
else:
|
||||
process_image(detector, gaze_estimator, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,117 +0,0 @@
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Face {i + 1}',
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,109 +0,0 @@
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input video path')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,7 +1,15 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for AgeGender attribute predictor."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.attribute import AgeGender
|
||||
from uniface.attribute import AgeGender, AttributeResult
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -24,19 +32,22 @@ def test_model_initialization(age_gender_model):
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
|
||||
assert isinstance(age, int), f'Age should be int, got {type(age)}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(result, AttributeResult), f'Result should be AttributeResult, got {type(result)}'
|
||||
assert isinstance(result.gender, int), f'Gender should be int, got {type(result.gender)}'
|
||||
assert isinstance(result.age, int), f'Age should be int, got {type(result.age)}'
|
||||
assert isinstance(result.sex, str), f'Sex should be str, got {type(result.sex)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Gender should be 0 (Female) or 1 (Male), got {result.gender}'
|
||||
assert result.sex in ['Female', 'Male'], f'Sex should be Female or Male, got {result.sex}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= result.age <= 120, f'Age should be between 0 and 120, got {result.age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +58,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
assert result.gender in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +68,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= age <= 120, f'Age out of range for image size {size}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.gender in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= result.age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
result2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
|
||||
assert age1 == age2, 'Same input should produce same age prediction'
|
||||
assert result1.gender == result2.gender, 'Same input should produce same gender prediction'
|
||||
assert result1.age == result2.age, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
result = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert result.gender in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= result.age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,25 +104,37 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender_id, age))
|
||||
result = age_gender_model.predict(mock_image, bbox)
|
||||
results.append(result)
|
||||
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for gender_id, age in results:
|
||||
assert gender_id in [0, 1]
|
||||
assert 0 <= age <= 120
|
||||
for result in results:
|
||||
assert result.gender in [0, 1]
|
||||
assert 0 <= result.age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f'Age should be non-negative, got {age}'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert result.age >= 0, f'Age should be non-negative, got {result.age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f'{result.sex}, {result.age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
|
||||
def test_attribute_result_fields(age_gender_model, mock_image, mock_bbox):
|
||||
"""Test that AttributeResult has correct fields for AgeGender model."""
|
||||
result = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
# AgeGender should set gender and age
|
||||
assert result.gender is not None
|
||||
assert result.age is not None
|
||||
|
||||
# AgeGender should NOT set race and age_group (FairFace only)
|
||||
assert result.race is None
|
||||
assert result.age_group is None
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for factory functions (create_detector, create_recognizer, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -35,8 +43,8 @@ def test_create_detector_with_config():
|
||||
detector = create_detector(
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3,
|
||||
confidence_threshold=0.8,
|
||||
nms_threshold=0.3,
|
||||
)
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
@@ -53,7 +61,7 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
@@ -141,13 +149,13 @@ def test_detect_faces_with_threshold():
|
||||
Test detect_faces with custom confidence threshold.
|
||||
"""
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
faces = detect_faces(mock_image, method='retinaface', confidence_threshold=0.8)
|
||||
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
|
||||
assert face.confidence >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -246,8 +254,8 @@ def test_detector_with_different_configs():
|
||||
"""
|
||||
Test creating multiple detectors with different configurations.
|
||||
"""
|
||||
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
|
||||
detector_high_thresh = create_detector('retinaface', confidence_threshold=0.9)
|
||||
detector_low_thresh = create_detector('retinaface', confidence_threshold=0.3)
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for 106-point facial landmark detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for BiSeNet face parsing model."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for face recognition models (ArcFace, MobileFace, SphereFace)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for RetinaFace detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,9 +17,9 @@ from uniface.detection import RetinaFace
|
||||
def retinaface_model():
|
||||
return RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
confidence_threshold=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
nms_threshold=0.4,
|
||||
post_nms_topk=750,
|
||||
)
|
||||
|
||||
@@ -27,15 +35,15 @@ def test_inference_on_640x640_image(retinaface_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -45,7 +53,7 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for SCRFD detector."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -9,8 +17,8 @@ from uniface.detection import SCRFD
|
||||
def scrfd_model():
|
||||
return SCRFD(
|
||||
model_name=SCRFDWeights.SCRFD_500M_KPS,
|
||||
conf_thresh=0.5,
|
||||
nms_thresh=0.4,
|
||||
confidence_threshold=0.5,
|
||||
nms_threshold=0.4,
|
||||
)
|
||||
|
||||
|
||||
@@ -25,15 +33,15 @@ def test_inference_on_640x640_image(scrfd_model):
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
# Face is a dataclass, check attributes exist
|
||||
assert hasattr(face, 'bbox'), "Each detection should have a 'bbox' attribute."
|
||||
assert hasattr(face, 'confidence'), "Each detection should have a 'confidence' attribute."
|
||||
assert hasattr(face, 'landmarks'), "Each detection should have a 'landmarks' attribute."
|
||||
|
||||
bbox = face['bbox']
|
||||
bbox = face.bbox
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face['landmarks']
|
||||
landmarks = face.landmarks
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
@@ -43,7 +51,7 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face['confidence']
|
||||
confidence = face.confidence
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
@@ -63,7 +71,7 @@ def test_different_input_sizes(scrfd_model):
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.5)
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
282
tests/test_types.py
Normal file
282
tests/test_types.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for UniFace type definitions (dataclasses)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
|
||||
class TestGazeResult:
|
||||
"""Tests for GazeResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result.pitch == 0.1
|
||||
assert result.yaw == -0.2
|
||||
|
||||
def test_immutability(self):
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
with pytest.raises(AttributeError):
|
||||
result.pitch = 0.5 # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = GazeResult(pitch=0.1234, yaw=-0.5678)
|
||||
repr_str = repr(result)
|
||||
assert 'GazeResult' in repr_str
|
||||
assert '0.1234' in repr_str
|
||||
assert '-0.5678' in repr_str
|
||||
|
||||
def test_equality(self):
|
||||
result1 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
result2 = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
assert result1 == result2
|
||||
|
||||
def test_hashable(self):
|
||||
"""Frozen dataclasses should be hashable."""
|
||||
result = GazeResult(pitch=0.1, yaw=-0.2)
|
||||
# Should not raise
|
||||
hash(result)
|
||||
# Can be used in sets/dicts
|
||||
result_set = {result}
|
||||
assert result in result_set
|
||||
|
||||
|
||||
class TestSpoofingResult:
|
||||
"""Tests for SpoofingResult dataclass."""
|
||||
|
||||
def test_creation_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
assert result.is_real is True
|
||||
assert result.confidence == 0.95
|
||||
|
||||
def test_creation_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.87)
|
||||
assert result.is_real is False
|
||||
assert result.confidence == 0.87
|
||||
|
||||
def test_immutability(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
with pytest.raises(AttributeError):
|
||||
result.is_real = False # type: ignore
|
||||
|
||||
def test_repr_real(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.9512)
|
||||
repr_str = repr(result)
|
||||
assert 'SpoofingResult' in repr_str
|
||||
assert 'Real' in repr_str
|
||||
assert '0.9512' in repr_str
|
||||
|
||||
def test_repr_fake(self):
|
||||
result = SpoofingResult(is_real=False, confidence=0.8765)
|
||||
repr_str = repr(result)
|
||||
assert 'Fake' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = SpoofingResult(is_real=True, confidence=0.95)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestEmotionResult:
|
||||
"""Tests for EmotionResult dataclass."""
|
||||
|
||||
def test_creation(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
assert result.emotion == 'Happy'
|
||||
assert result.confidence == 0.92
|
||||
|
||||
def test_immutability(self):
|
||||
result = EmotionResult(emotion='Sad', confidence=0.75)
|
||||
with pytest.raises(AttributeError):
|
||||
result.emotion = 'Happy' # type: ignore
|
||||
|
||||
def test_repr(self):
|
||||
result = EmotionResult(emotion='Angry', confidence=0.8123)
|
||||
repr_str = repr(result)
|
||||
assert 'EmotionResult' in repr_str
|
||||
assert 'Angry' in repr_str
|
||||
assert '0.8123' in repr_str
|
||||
|
||||
def test_various_emotions(self):
|
||||
emotions = ['Neutral', 'Happy', 'Sad', 'Surprise', 'Fear', 'Disgust', 'Angry']
|
||||
for emotion in emotions:
|
||||
result = EmotionResult(emotion=emotion, confidence=0.5)
|
||||
assert result.emotion == emotion
|
||||
|
||||
def test_hashable(self):
|
||||
result = EmotionResult(emotion='Happy', confidence=0.92)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestAttributeResult:
|
||||
"""Tests for AttributeResult dataclass."""
|
||||
|
||||
def test_age_gender_result(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
assert result.gender == 1
|
||||
assert result.age == 25
|
||||
assert result.age_group is None
|
||||
assert result.race is None
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_fairface_result(self):
|
||||
result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
assert result.gender == 0
|
||||
assert result.age is None
|
||||
assert result.age_group == '20-29'
|
||||
assert result.race == 'East Asian'
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_female(self):
|
||||
result = AttributeResult(gender=0)
|
||||
assert result.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self):
|
||||
result = AttributeResult(gender=1)
|
||||
assert result.sex == 'Male'
|
||||
|
||||
def test_immutability(self):
|
||||
result = AttributeResult(gender=1, age=30)
|
||||
with pytest.raises(AttributeError):
|
||||
result.age = 31 # type: ignore
|
||||
|
||||
def test_repr_age_gender(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
repr_str = repr(result)
|
||||
assert 'AttributeResult' in repr_str
|
||||
assert 'Male' in repr_str
|
||||
assert 'age=25' in repr_str
|
||||
|
||||
def test_repr_fairface(self):
|
||||
result = AttributeResult(gender=0, age_group='30-39', race='White')
|
||||
repr_str = repr(result)
|
||||
assert 'Female' in repr_str
|
||||
assert 'age_group=30-39' in repr_str
|
||||
assert 'race=White' in repr_str
|
||||
|
||||
def test_hashable(self):
|
||||
result = AttributeResult(gender=1, age=25)
|
||||
hash(result)
|
||||
|
||||
|
||||
class TestFace:
|
||||
"""Tests for Face dataclass."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_face(self):
|
||||
return Face(
|
||||
bbox=np.array([100, 100, 200, 200]),
|
||||
confidence=0.95,
|
||||
landmarks=np.array([[120, 130], [180, 130], [150, 160], [130, 180], [170, 180]]),
|
||||
)
|
||||
|
||||
def test_creation(self, sample_face):
|
||||
assert sample_face.confidence == 0.95
|
||||
assert sample_face.bbox.shape == (4,)
|
||||
assert sample_face.landmarks.shape == (5, 2)
|
||||
|
||||
def test_optional_attributes_default_none(self, sample_face):
|
||||
assert sample_face.embedding is None
|
||||
assert sample_face.gender is None
|
||||
assert sample_face.age is None
|
||||
assert sample_face.age_group is None
|
||||
assert sample_face.race is None
|
||||
assert sample_face.emotion is None
|
||||
assert sample_face.emotion_confidence is None
|
||||
|
||||
def test_mutability(self, sample_face):
|
||||
"""Face should be mutable for FaceAnalyzer enrichment."""
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 25
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
|
||||
assert sample_face.gender == 1
|
||||
assert sample_face.age == 25
|
||||
assert sample_face.embedding.shape == (512,)
|
||||
|
||||
def test_sex_property_none(self, sample_face):
|
||||
assert sample_face.sex is None
|
||||
|
||||
def test_sex_property_female(self, sample_face):
|
||||
sample_face.gender = 0
|
||||
assert sample_face.sex == 'Female'
|
||||
|
||||
def test_sex_property_male(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
assert sample_face.sex == 'Male'
|
||||
|
||||
def test_bbox_xyxy(self, sample_face):
|
||||
bbox_xyxy = sample_face.bbox_xyxy
|
||||
np.testing.assert_array_equal(bbox_xyxy, [100, 100, 200, 200])
|
||||
|
||||
def test_bbox_xywh(self, sample_face):
|
||||
bbox_xywh = sample_face.bbox_xywh
|
||||
np.testing.assert_array_equal(bbox_xywh, [100, 100, 100, 100])
|
||||
|
||||
def test_to_dict(self, sample_face):
|
||||
result = sample_face.to_dict()
|
||||
assert isinstance(result, dict)
|
||||
assert 'bbox' in result
|
||||
assert 'confidence' in result
|
||||
assert 'landmarks' in result
|
||||
|
||||
def test_repr_minimal(self, sample_face):
|
||||
repr_str = repr(sample_face)
|
||||
assert 'Face' in repr_str
|
||||
assert 'confidence=0.950' in repr_str
|
||||
|
||||
def test_repr_with_attributes(self, sample_face):
|
||||
sample_face.gender = 1
|
||||
sample_face.age = 30
|
||||
sample_face.emotion = 'Happy'
|
||||
|
||||
repr_str = repr(sample_face)
|
||||
assert 'age=30' in repr_str
|
||||
assert 'sex=Male' in repr_str
|
||||
assert 'emotion=Happy' in repr_str
|
||||
|
||||
def test_compute_similarity_no_embeddings(self, sample_face):
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
with pytest.raises(ValueError, match='Both faces must have embeddings'):
|
||||
sample_face.compute_similarity(other_face)
|
||||
|
||||
def test_compute_similarity_with_embeddings(self, sample_face):
|
||||
# Create normalized embeddings
|
||||
sample_face.embedding = np.random.randn(512)
|
||||
sample_face.embedding /= np.linalg.norm(sample_face.embedding)
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
)
|
||||
other_face.embedding = np.random.randn(512)
|
||||
other_face.embedding /= np.linalg.norm(other_face.embedding)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert isinstance(similarity, float)
|
||||
assert -1 <= similarity <= 1
|
||||
|
||||
def test_compute_similarity_same_embedding(self, sample_face):
|
||||
embedding = np.random.randn(512)
|
||||
embedding /= np.linalg.norm(embedding)
|
||||
sample_face.embedding = embedding.copy()
|
||||
|
||||
other_face = Face(
|
||||
bbox=np.array([50, 50, 150, 150]),
|
||||
confidence=0.90,
|
||||
landmarks=np.random.randn(5, 2),
|
||||
embedding=embedding.copy(),
|
||||
)
|
||||
|
||||
similarity = sample_face.compute_similarity(other_face)
|
||||
assert similarity == pytest.approx(1.0, abs=1e-5)
|
||||
@@ -1,3 +1,11 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Tests for utility functions (compute_similarity, face_alignment, etc.)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
@@ -116,7 +124,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
|
||||
assert isinstance(similarity, float | np.floating), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -259,4 +267,4 @@ def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
# Should be a valid similarity score
|
||||
assert -1.0 <= similarity <= 1.0
|
||||
assert isinstance(similarity, (float, np.floating))
|
||||
assert isinstance(similarity, float | np.floating)
|
||||
|
||||
121
tools/README.md
Normal file
121
tools/README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
# Tools
|
||||
|
||||
CLI utilities for testing and running UniFace features.
|
||||
|
||||
## Available Tools
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `detection.py` | Face detection on image, video, or webcam |
|
||||
| `face_anonymize.py` | Face anonymization/blurring for privacy |
|
||||
| `age_gender.py` | Age and gender prediction |
|
||||
| `face_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `gaze_estimation.py` | Gaze direction estimation |
|
||||
| `landmarks.py` | 106-point facial landmark detection |
|
||||
| `recognition.py` | Face embedding extraction and comparison |
|
||||
| `face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `face_search.py` | Real-time face matching against reference |
|
||||
| `fairface.py` | FairFace attribute prediction (race, gender, age) |
|
||||
| `spoofing.py` | Face anti-spoofing detection |
|
||||
| `face_parsing.py` | Face semantic segmentation |
|
||||
| `video_detection.py` | Face detection on video files with progress bar |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Unified `--source` Pattern
|
||||
|
||||
All tools use a unified `--source` argument that accepts:
|
||||
- **Image path**: `--source photo.jpg`
|
||||
- **Video path**: `--source video.mp4`
|
||||
- **Camera ID**: `--source 0` (default webcam), `--source 1` (external camera)
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python tools/detection.py --source assets/test.jpg # image
|
||||
python tools/detection.py --source video.mp4 # video
|
||||
python tools/detection.py --source 0 # webcam
|
||||
|
||||
# Face anonymization
|
||||
python tools/face_anonymize.py --source assets/test.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate
|
||||
|
||||
# Age and gender
|
||||
python tools/age_gender.py --source assets/test.jpg
|
||||
python tools/age_gender.py --source 0
|
||||
|
||||
# Emotion detection
|
||||
python tools/face_emotion.py --source assets/test.jpg
|
||||
python tools/face_emotion.py --source 0
|
||||
|
||||
# Gaze estimation
|
||||
python tools/gaze_estimation.py --source assets/test.jpg
|
||||
python tools/gaze_estimation.py --source 0
|
||||
|
||||
# Landmarks
|
||||
python tools/landmarks.py --source assets/test.jpg
|
||||
python tools/landmarks.py --source 0
|
||||
|
||||
# FairFace attributes
|
||||
python tools/fairface.py --source assets/test.jpg
|
||||
python tools/fairface.py --source 0
|
||||
|
||||
# Face parsing
|
||||
python tools/face_parsing.py --source assets/test.jpg
|
||||
python tools/face_parsing.py --source 0
|
||||
|
||||
# Face anti-spoofing
|
||||
python tools/spoofing.py --source assets/test.jpg
|
||||
python tools/spoofing.py --source 0
|
||||
|
||||
# Face analyzer
|
||||
python tools/face_analyzer.py --source assets/test.jpg
|
||||
python tools/face_analyzer.py --source 0
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python tools/recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match against reference)
|
||||
python tools/face_search.py --reference person.jpg --source 0
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
|
||||
# Video processing with progress bar
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python tools/download_model.py --model-type retinaface
|
||||
python tools/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Common Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--source` | Input source: image/video path or camera ID (0, 1, ...) |
|
||||
| `--detector` | Choose detector: `retinaface`, `scrfd`, `yolov5face` |
|
||||
| `--threshold` | Visualization confidence threshold (default: varies) |
|
||||
| `--save-dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Supported Formats
|
||||
|
||||
**Images:** `.jpg`, `.jpeg`, `.png`, `.bmp`, `.webp`, `.tiff`
|
||||
|
||||
**Videos:** `.mp4`, `.avi`, `.mov`, `.mkv`, `.webm`, `.flv`
|
||||
|
||||
**Camera:** Use integer IDs (`0`, `1`, `2`, ...)
|
||||
|
||||
## Quick Test
|
||||
|
||||
```bash
|
||||
python tools/detection.py --source assets/test.jpg
|
||||
```
|
||||
213
tools/age_gender.py
Normal file
213
tools/age_gender.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Age and gender prediction on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/age_gender.py --source path/to/image.jpg
|
||||
python tools/age_gender.py --source path/to/video.mp4
|
||||
python tools/age_gender.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_age_gender_label(image, bbox, sex: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = age_gender.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age} years old')
|
||||
draw_age_gender_label(image, face.bbox, result.sex, result.age)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
age_gender,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_age_gender.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, age_gender, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = age_gender.predict(frame, face.bbox)
|
||||
draw_age_gender_label(frame, face.bbox, result.sex, result.age)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, age_gender, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, age_gender, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,5 +1,12 @@
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Batch face detection on a folder of images.
|
||||
|
||||
Usage:
|
||||
python tools/batch_process.py --input images/ --output results/
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
@@ -28,9 +35,9 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
|
||||
faces = detector.detect(image)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
196
tools/detection.py
Normal file
196
tools/detection.py
Normal file
@@ -0,0 +1,196 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on image, video, or webcam.
|
||||
|
||||
Usage:
|
||||
python tools/detection.py --source path/to/image.jpg
|
||||
python tools/detection.py --source path/to/video.mp4
|
||||
python tools/detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace, YOLOv5Face
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Detected {len(faces)} face(s). Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, video_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_out.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Show progress
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
scores=scores,
|
||||
landmarks=landmarks,
|
||||
vis_threshold=threshold,
|
||||
draw_score=True,
|
||||
fancy_bbox=True,
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face'])
|
||||
parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize detector
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
elif args.method == 'scrfd':
|
||||
detector = SCRFD()
|
||||
else:
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
|
||||
detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M)
|
||||
|
||||
# Determine source type and process
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, args.source, args.threshold, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, args.source, args.threshold, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
239
tools/face_analyzer.py
Normal file
239
tools/face_analyzer.py
Normal file
@@ -0,0 +1,239 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face analysis using FaceAnalyzer.
|
||||
|
||||
Usage:
|
||||
python tools/face_analyzer.py --source path/to/image.jpg
|
||||
python tools/face_analyzer.py --source path/to/video.mp4
|
||||
python tools/face_analyzer.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, _x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.sex:
|
||||
lines.append(f'{face.sex}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.sex}, {face.age}y' if face.age and face.sex else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(analyzer, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_analysis.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(analyzer, camera_id: int = 0):
|
||||
"""Run real-time analysis on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = analyzer.analyze(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, fancy_bbox=True)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(frame, face, i)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Analyzer', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(analyzer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(analyzer, args.source, args.save_dir, show_similarity=not args.no_similarity)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(analyzer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
281
tools/face_anonymize.py
Normal file
281
tools/face_anonymize.py
Normal file
@@ -0,0 +1,281 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face anonymization/blurring for privacy.
|
||||
|
||||
Usage:
|
||||
python tools/face_anonymize.py --source path/to/image.jpg --method pixelate
|
||||
python tools/face_anonymize.py --source path/to/video.mp4 --method gaussian
|
||||
python tools/face_anonymize.py --source 0 --method pixelate # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.privacy import BlurFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
show_detections: bool = False,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if show_detections and faces:
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
preview = image.copy()
|
||||
bboxes = [face.bbox for face in faces]
|
||||
scores = [face.confidence for face in faces]
|
||||
landmarks = [face.landmarks for face in faces]
|
||||
draw_detections(preview, bboxes, scores, landmarks)
|
||||
|
||||
cv2.imshow('Detections (Press any key to continue)', preview)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if faces:
|
||||
anonymized = blurrer.anonymize(image, faces)
|
||||
else:
|
||||
anonymized = image
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
basename = os.path.splitext(os.path.basename(image_path))[0]
|
||||
output_path = os.path.join(save_dir, f'{basename}_anonymized.jpg')
|
||||
cv2.imwrite(output_path, anonymized)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
blurrer: BlurFace,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_anonymized.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, blurrer: BlurFace, camera_id: int = 0):
|
||||
"""Run real-time anonymization on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
if faces:
|
||||
frame = blurrer.anonymize(frame, faces, inplace=True)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces blurred: {len(faces)} | Method: {blurrer.method}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('Face Anonymization (Press q to quit)', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Face anonymization using various blur methods',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Anonymize image with pixelation (default)
|
||||
python run_anonymization.py --source photo.jpg
|
||||
|
||||
# Use Gaussian blur with custom strength
|
||||
python run_anonymization.py --source photo.jpg --method gaussian --blur-strength 5.0
|
||||
|
||||
# Real-time webcam anonymization
|
||||
python run_anonymization.py --source 0 --method pixelate
|
||||
|
||||
# Black boxes for maximum privacy
|
||||
python run_anonymization.py --source photo.jpg --method blackout
|
||||
|
||||
# Custom pixelation intensity
|
||||
python run_anonymization.py --source photo.jpg --method pixelate --pixel-blocks 5
|
||||
""",
|
||||
)
|
||||
|
||||
# Input/output
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
|
||||
# Blur method
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default='pixelate',
|
||||
choices=['gaussian', 'pixelate', 'blackout', 'elliptical', 'median'],
|
||||
help='Blur method (default: pixelate)',
|
||||
)
|
||||
|
||||
# Method-specific parameters
|
||||
parser.add_argument(
|
||||
'--blur-strength',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='Blur strength for gaussian/elliptical/median (default: 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--pixel-blocks',
|
||||
type=int,
|
||||
default=20,
|
||||
help='Number of pixel blocks for pixelate (default: 20, lower=more pixelated)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--color',
|
||||
type=str,
|
||||
default='0,0,0',
|
||||
help='Fill color for blackout as R,G,B (default: 0,0,0 for black)',
|
||||
)
|
||||
parser.add_argument('--margin', type=int, default=20, help='Margin for elliptical blur (default: 20)')
|
||||
|
||||
# Detection
|
||||
parser.add_argument(
|
||||
'--confidence-threshold',
|
||||
type=float,
|
||||
default=0.5,
|
||||
help='Detection confidence threshold (default: 0.5)',
|
||||
)
|
||||
|
||||
# Visualization
|
||||
parser.add_argument(
|
||||
'--show-detections',
|
||||
action='store_true',
|
||||
help='Show detection boxes before blurring (image mode only)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse color
|
||||
color_values = [int(x) for x in args.color.split(',')]
|
||||
if len(color_values) != 3:
|
||||
parser.error('--color must be in format R,G,B (e.g., 0,0,0)')
|
||||
color = tuple(color_values)
|
||||
|
||||
# Initialize detector
|
||||
print(f'Initializing face detector (confidence_threshold={args.confidence_threshold})...')
|
||||
detector = RetinaFace(confidence_threshold=args.confidence_threshold)
|
||||
|
||||
# Initialize blurrer
|
||||
print(f'Initializing blur method: {args.method}')
|
||||
blurrer = BlurFace(
|
||||
method=args.method,
|
||||
blur_strength=args.blur_strength,
|
||||
pixel_blocks=args.pixel_blocks,
|
||||
color=color,
|
||||
margin=args.margin,
|
||||
)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, blurrer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, blurrer, args.source, args.save_dir, args.show_detections)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, blurrer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
213
tools/face_emotion.py
Normal file
213
tools/face_emotion.py
Normal file
@@ -0,0 +1,213 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Emotion detection on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_emotion.py --source path/to/image.jpg
|
||||
python tools/face_emotion.py --source path/to/video.mp4
|
||||
python tools/face_emotion.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = emotion_predictor.predict(image, face.landmarks)
|
||||
print(f' Face {i + 1}: {result.emotion} (confidence: {result.confidence:.3f})')
|
||||
draw_emotion_label(image, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_emotion.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, emotion_predictor, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = emotion_predictor.predict(frame, face.landmarks)
|
||||
draw_emotion_label(frame, face.bbox, result.emotion, result.confidence)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, emotion_predictor, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, emotion_predictor, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
250
tools/face_parsing.py
Normal file
250
tools/face_parsing.py
Normal file
@@ -0,0 +1,250 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face parsing on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/face_parsing.py --source path/to/image.jpg
|
||||
python tools/face_parsing.py --source path/to/video.mp4
|
||||
python tools/face_parsing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import ParsingWeights
|
||||
from uniface.parsing import BiSeNet
|
||||
from uniface.visualization import vis_parsing_maps
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def expand_bbox(
|
||||
bbox: np.ndarray,
|
||||
image_shape: tuple[int, int],
|
||||
expand_ratio: float = 0.2,
|
||||
expand_top_ratio: float = 0.4,
|
||||
) -> tuple[int, int, int, int]:
|
||||
"""
|
||||
Expand bounding box to include full head region for face parsing.
|
||||
|
||||
Face detection typically returns tight face boxes, but face parsing
|
||||
requires the full head including hair, ears, and neck.
|
||||
|
||||
Args:
|
||||
bbox: Original bounding box [x1, y1, x2, y2].
|
||||
image_shape: Image dimensions as (height, width).
|
||||
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
|
||||
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
height, width = image_shape[:2]
|
||||
|
||||
face_width = x2 - x1
|
||||
face_height = y2 - y1
|
||||
|
||||
expand_x = int(face_width * expand_ratio)
|
||||
expand_y_bottom = int(face_height * expand_ratio)
|
||||
expand_y_top = int(face_height * expand_top_ratio)
|
||||
|
||||
new_x1 = max(0, x1 - expand_x)
|
||||
new_y1 = max(0, y1 - expand_y_top)
|
||||
new_x2 = min(width, x2 + expand_x)
|
||||
new_y2 = min(height, y2 + expand_y_bottom)
|
||||
|
||||
return new_x1, new_y1, new_x2, new_y2
|
||||
|
||||
|
||||
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
result_image = image.copy()
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
|
||||
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
result_image[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
|
||||
cv2.imwrite(output_path, result_image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, parser, video_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_parsing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, parser, camera_id: int = 0, expand_ratio: float = 0.2):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
mask = parser.parse(face_crop)
|
||||
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
|
||||
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
|
||||
|
||||
frame[y1:y2, x1:x2] = vis_result
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Parsing', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser_arg = argparse.ArgumentParser(description='Run face parsing')
|
||||
parser_arg.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser_arg.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
parser_arg.add_argument(
|
||||
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
|
||||
)
|
||||
parser_arg.add_argument(
|
||||
'--expand-ratio',
|
||||
type=float,
|
||||
default=0.2,
|
||||
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
|
||||
)
|
||||
args = parser_arg.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, parser, int(args.source), expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, parser, args.source, args.save_dir, expand_ratio=args.expand_ratio)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/face_search.py
Normal file
190
tools/face_search.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Real-time face search: match faces against a reference image.
|
||||
|
||||
Usage:
|
||||
python tools/face_search.py --reference person.jpg --source 0 # webcam
|
||||
python tools/face_search.py --reference person.jpg --source video.mp4
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
"""Get recognizer by name."""
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extract embedding from reference image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
landmarks = faces[0].landmarks
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def process_frame(frame, detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Process a single frame and return annotated frame."""
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
landmarks = face.landmarks
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
return frame
|
||||
|
||||
|
||||
def process_video(detector, recognizer, ref_embedding: np.ndarray, video_path: str, save_dir: str, threshold: float):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_search.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, recognizer, ref_embedding: np.ndarray, camera_id: int = 0, threshold: float = 0.4):
|
||||
"""Run real-time face search on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = process_frame(frame, detector, recognizer, ref_embedding, threshold)
|
||||
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--reference', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.reference):
|
||||
print(f'Error: Reference image not found: {args.reference}')
|
||||
return
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print(f'Loading reference: {args.reference}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.reference)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, recognizer, ref_embedding, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, recognizer, ref_embedding, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Source must be a video file or camera ID, not '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
tools/fairface.py
Normal file
214
tools/fairface.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""FairFace attribute prediction (race, gender, age) on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/fairface.py --source path/to/image.jpg
|
||||
python tools/fairface.py --source path/to/video.mp4
|
||||
python tools/fairface.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.attribute import FairFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_fairface_label(image, bbox, sex: str, age_group: str, race: str):
|
||||
"""Draw FairFace attributes above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{sex}, {age_group}, {race}'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
fairface,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
result = fairface.predict(image, face.bbox)
|
||||
print(f' Face {i + 1}: {result.sex}, {result.age_group}, {result.race}')
|
||||
draw_fairface_label(image, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_fairface.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
fairface,
|
||||
video_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_fairface.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, fairface, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
for face in faces:
|
||||
result = fairface.predict(frame, face.bbox)
|
||||
draw_fairface_label(frame, face.bbox, result.sex, result.age_group, result.race)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('FairFace Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run FairFace attribute prediction (race, gender, age)')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
fairface = FairFace()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, fairface, int(args.source), args.threshold)
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, fairface, args.source, args.save_dir, args.threshold)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
190
tools/gaze_estimation.py
Normal file
190
tools/gaze_estimation.py
Normal file
@@ -0,0 +1,190 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Gaze estimation on detected faces.
|
||||
|
||||
Usage:
|
||||
python tools/gaze_estimation.py --source path/to/image.jpg
|
||||
python tools/gaze_estimation.py --source path/to/video.mp4
|
||||
python tools/gaze_estimation.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.gaze import MobileGaze
|
||||
from uniface.visualization import draw_gaze
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, gaze_estimator, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
print(f' Face {i + 1}: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
|
||||
draw_gaze(image, bbox, result.pitch, result.yaw, draw_angles=True)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_gaze.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, gaze_estimator, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_gaze.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, gaze_estimator, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
face_crop = frame[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size == 0:
|
||||
continue
|
||||
|
||||
result = gaze_estimator.estimate(face_crop)
|
||||
draw_gaze(frame, bbox, result.pitch, result.yaw)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Gaze Estimation', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run gaze estimation')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace()
|
||||
gaze_estimator = MobileGaze()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, gaze_estimator, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, gaze_estimator, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, gaze_estimator, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
187
tools/landmarks.py
Normal file
187
tools/landmarks.py
Normal file
@@ -0,0 +1,187 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""106-point facial landmark detection.
|
||||
|
||||
Usage:
|
||||
python tools/landmarks.py --source path/to/image.jpg
|
||||
python tools/landmarks.py --source path/to/video.mp4
|
||||
python tools/landmarks.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a single image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(image, f'Face {i + 1}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, landmarker, video_path: str, save_dir: str = 'outputs'):
|
||||
"""Process a video file."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_landmarks.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, landmarker, camera_id: int = 0):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
bbox = face.bbox
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, landmarker, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, landmarker, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, landmarker, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,6 +1,13 @@
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face recognition: extract embeddings or compare two faces.
|
||||
|
||||
Usage:
|
||||
python tools/recognition.py --image path/to/image.jpg
|
||||
python tools/recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
214
tools/spoofing.py
Normal file
214
tools/spoofing.py
Normal file
@@ -0,0 +1,214 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face Anti-Spoofing Detection.
|
||||
|
||||
Usage:
|
||||
python tools/spoofing.py --source path/to/image.jpg
|
||||
python tools/spoofing.py --source path/to/video.mp4
|
||||
python tools/spoofing.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import RetinaFace
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.spoofing import create_spoofer
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def draw_spoofing_result(
|
||||
image: np.ndarray,
|
||||
bbox: list,
|
||||
is_real: bool,
|
||||
confidence: float,
|
||||
thickness: int = 2,
|
||||
) -> None:
|
||||
"""Draw bounding box with anti-spoofing result.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bbox: Bounding box in [x1, y1, x2, y2] format.
|
||||
is_real: True if real face, False if fake.
|
||||
confidence: Confidence score (0.0 to 1.0).
|
||||
thickness: Line thickness for bounding box.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
|
||||
color = (0, 255, 0) if is_real else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
label = 'Real' if is_real else 'Fake'
|
||||
text = f'{label}: {confidence:.1%}'
|
||||
|
||||
(tw, th), _baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), color, -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(detector, spoofer, image_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a single image for face anti-spoofing detection."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print('No faces detected in the image.')
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
result = spoofer.predict(image, face.bbox)
|
||||
label = 'Real' if result.is_real else 'Fake'
|
||||
print(f' Face {i}: {label} ({result.confidence:.1%})')
|
||||
|
||||
draw_spoofing_result(image, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_spoofing.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def process_video(detector, spoofer, video_path: str, save_dir: str = 'outputs') -> None:
|
||||
"""Process a video file for face anti-spoofing detection."""
|
||||
cap = cv2.VideoCapture(video_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{video_path}'")
|
||||
return
|
||||
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(video_path).stem}_spoofing.mp4')
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
print(f'Processing video: {video_path} ({total_frames} frames)')
|
||||
frame_count = 0
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
out.write(frame)
|
||||
|
||||
if frame_count % 100 == 0:
|
||||
print(f' Processed {frame_count}/{total_frames} frames...')
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
print(f'Done! Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, spoofer, camera_id: int = 0) -> None:
|
||||
"""Run real-time anti-spoofing detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame = cv2.flip(frame, 1)
|
||||
faces = detector.detect(frame)
|
||||
|
||||
for face in faces:
|
||||
result = spoofer.predict(frame, face.bbox)
|
||||
draw_spoofing_result(frame, face.bbox, result.is_real, result.confidence)
|
||||
|
||||
cv2.imshow('Face Anti-Spoofing', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face Anti-Spoofing Detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
default='v2',
|
||||
choices=['v1se', 'v2'],
|
||||
help='Model variant: v1se or v2 (default: v2)',
|
||||
)
|
||||
parser.add_argument('--scale', type=float, default=None, help='Custom crop scale (default: auto)')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Select model variant
|
||||
model_name = MiniFASNetWeights.V1SE if args.model == 'v1se' else MiniFASNetWeights.V2
|
||||
|
||||
# Initialize models
|
||||
print(f'Initializing models (MiniFASNet {args.model.upper()})...')
|
||||
detector = RetinaFace()
|
||||
spoofer = create_spoofer(model_name=model_name, scale=args.scale)
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, spoofer, int(args.source))
|
||||
elif source_type == 'image':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Image not found: {args.source}')
|
||||
return
|
||||
process_image(detector, spoofer, args.source, args.save_dir)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
process_video(detector, spoofer, args.source, args.save_dir)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
180
tools/video_detection.py
Normal file
180
tools/video_detection.py
Normal file
@@ -0,0 +1,180 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Face detection on video files with progress tracking.
|
||||
|
||||
Usage:
|
||||
python tools/video_detection.py --source video.mp4
|
||||
python tools/video_detection.py --source video.mp4 --output output.mp4
|
||||
python tools/video_detection.py --source 0 # webcam
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff'}
|
||||
VIDEO_EXTENSIONS = {'.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv'}
|
||||
|
||||
|
||||
def get_source_type(source: str) -> str:
|
||||
"""Determine if source is image, video, or camera."""
|
||||
if source.isdigit():
|
||||
return 'camera'
|
||||
path = Path(source)
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return 'image'
|
||||
elif suffix in VIDEO_EXTENSIONS:
|
||||
return 'video'
|
||||
else:
|
||||
return 'unknown'
|
||||
|
||||
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
"""Process a video file with progress bar."""
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
if show_preview:
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def run_camera(detector, camera_id: int = 0, threshold: float = 0.6):
|
||||
"""Run real-time detection on webcam."""
|
||||
cap = cv2.VideoCapture(camera_id)
|
||||
if not cap.isOpened():
|
||||
print(f'Cannot open camera {camera_id}')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1)
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=threshold, fancy_bbox=True
|
||||
)
|
||||
|
||||
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--source', type=str, required=True, help='Video path or camera ID (0, 1, ...)')
|
||||
parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory (if --output not specified)')
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
source_type = get_source_type(args.source)
|
||||
|
||||
if source_type == 'camera':
|
||||
run_camera(detector, int(args.source), args.threshold)
|
||||
elif source_type == 'video':
|
||||
if not os.path.exists(args.source):
|
||||
print(f'Error: Video not found: {args.source}')
|
||||
return
|
||||
|
||||
# Determine output path
|
||||
if args.output:
|
||||
output_path = args.output
|
||||
else:
|
||||
os.makedirs(args.save_dir, exist_ok=True)
|
||||
output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4')
|
||||
|
||||
process_video(detector, args.source, output_path, args.threshold, args.preview)
|
||||
else:
|
||||
print(f"Error: Unknown source type for '{args.source}'")
|
||||
print('Supported formats: videos (.mp4, .avi, ...) or camera ID (0, 1, ...)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -11,10 +11,24 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""UniFace: A comprehensive library for face analysis.
|
||||
|
||||
This library provides unified APIs for:
|
||||
- Face detection (RetinaFace, SCRFD, YOLOv5Face)
|
||||
- Face recognition (ArcFace, MobileFace, SphereFace)
|
||||
- Facial landmarks (106-point detection)
|
||||
- Face parsing (semantic segmentation)
|
||||
- Gaze estimation
|
||||
- Age, gender, and emotion prediction
|
||||
- Face anti-spoofing
|
||||
- Privacy/anonymization
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.5.0'
|
||||
|
||||
__version__ = '2.0.0'
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
from uniface.log import Logger, enable_logging
|
||||
@@ -22,13 +36,7 @@ from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections, vis_parsing_maps
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender
|
||||
from .face import Face
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .attribute import AgeGender, FairFace
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
@@ -40,9 +48,20 @@ from .detection import (
|
||||
from .gaze import MobileGaze, create_gaze_estimator
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .parsing import BiSeNet, create_face_parser
|
||||
from .privacy import BlurFace, anonymize_faces
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
from .spoofing import MiniFASNet, create_spoofer
|
||||
from .types import AttributeResult, EmotionResult, Face, GazeResult, SpoofingResult
|
||||
|
||||
# Optional: Emotion requires PyTorch
|
||||
Emotion: type | None
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
|
||||
__all__ = [
|
||||
# Metadata
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
@@ -55,6 +74,7 @@ __all__ = [
|
||||
'create_gaze_estimator',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'create_spoofer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
@@ -68,18 +88,28 @@ __all__ = [
|
||||
# Landmark models
|
||||
'Landmark106',
|
||||
# Gaze models
|
||||
'GazeResult',
|
||||
'MobileGaze',
|
||||
# Parsing models
|
||||
'BiSeNet',
|
||||
# Attribute models
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
# Spoofing models
|
||||
'MiniFASNet',
|
||||
'SpoofingResult',
|
||||
# Privacy
|
||||
'BlurFace',
|
||||
'anonymize_faces',
|
||||
# Utilities
|
||||
'Logger',
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'vis_parsing_maps',
|
||||
'enable_logging',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'Logger',
|
||||
'enable_logging',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
@@ -2,74 +2,102 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes."""
|
||||
"""Unified face analyzer combining detection, recognition, and attributes.
|
||||
|
||||
This class provides a high-level interface for face analysis by combining
|
||||
multiple components: face detection, recognition (embedding extraction),
|
||||
and attribute prediction (age, gender, race).
|
||||
|
||||
Args:
|
||||
detector: Face detector instance for detecting faces in images.
|
||||
recognizer: Optional face recognizer for extracting embeddings.
|
||||
age_gender: Optional age/gender predictor.
|
||||
fairface: Optional FairFace predictor for demographics.
|
||||
|
||||
Example:
|
||||
>>> from uniface import RetinaFace, ArcFace, FaceAnalyzer
|
||||
>>> detector = RetinaFace()
|
||||
>>> recognizer = ArcFace()
|
||||
>>> analyzer = FaceAnalyzer(detector, recognizer=recognizer)
|
||||
>>> faces = analyzer.analyze(image)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: Optional[BaseRecognizer] = None,
|
||||
age_gender: Optional[AgeGender] = None,
|
||||
recognizer: BaseRecognizer | None = None,
|
||||
age_gender: AgeGender | None = None,
|
||||
fairface: FairFace | None = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
self.fairface = fairface
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
if fairface:
|
||||
Logger.info(f' - FairFace enabled: {fairface.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
def analyze(self, image: np.ndarray) -> list[Face]:
|
||||
"""Analyze faces in an image.
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
Performs face detection and optionally extracts embeddings and
|
||||
predicts attributes for each detected face.
|
||||
|
||||
embedding = None
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
|
||||
Returns:
|
||||
List of Face objects with detection results and any predicted attributes.
|
||||
"""
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender, age = self.age_gender.predict(image, bbox)
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
|
||||
result = self.age_gender.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.sex}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender=gender,
|
||||
)
|
||||
faces.append(face)
|
||||
if self.fairface is not None:
|
||||
try:
|
||||
result = self.fairface.predict(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
Logger.debug(f' Face {idx + 1}: AgeGroup={face.age_group}, Gender={face.sex}, Race={face.race}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict FairFace attributes: {e}')
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
@@ -80,4 +108,6 @@ class FaceAnalyzer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
if self.fairface:
|
||||
parts.append(f'fairface={self.fairface.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
|
||||
@@ -2,13 +2,17 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
from uniface.attribute.fairface import FairFace
|
||||
from uniface.constants import AgeGenderWeights, DDAMFNWeights, FairFaceWeights
|
||||
from uniface.types import AttributeResult, EmotionResult, Face
|
||||
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
@@ -20,19 +24,30 @@ except ImportError:
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
|
||||
__all__ = [
|
||||
'AgeGender',
|
||||
'AttributeResult',
|
||||
'Emotion',
|
||||
'EmotionResult',
|
||||
'FairFace',
|
||||
'create_attribute_predictor',
|
||||
'predict_attributes',
|
||||
]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
**{model: AgeGender for model in AgeGenderWeights},
|
||||
**dict.fromkeys(AgeGenderWeights, AgeGender),
|
||||
**dict.fromkeys(FairFaceWeights, FairFace),
|
||||
}
|
||||
|
||||
# Add Emotion models only if PyTorch is available
|
||||
if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
_ATTRIBUTE_MODELS.update(dict.fromkeys(DDAMFNWeights, Emotion))
|
||||
|
||||
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
def create_attribute_predictor(
|
||||
model_name: AgeGenderWeights | DDAMFNWeights | FairFaceWeights, **kwargs: Any
|
||||
) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
@@ -41,11 +56,13 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
Args:
|
||||
model_name: The enum corresponding to the desired attribute model
|
||||
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
|
||||
(e.g., AgeGenderWeights.DEFAULT, DDAMFNWeights.AFFECNET7,
|
||||
or FairFaceWeights.DEFAULT).
|
||||
**kwargs: Additional keyword arguments to pass to the model's constructor.
|
||||
|
||||
Returns:
|
||||
An initialized instance of an Attribute predictor class (e.g., AgeGender).
|
||||
An initialized instance of an Attribute predictor class
|
||||
(e.g., AgeGender, FairFace, or Emotion).
|
||||
|
||||
Raises:
|
||||
ValueError: If the provided model_name is not a supported enum.
|
||||
@@ -54,46 +71,44 @@ def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
|
||||
f'Unsupported attribute model: {model_name}. '
|
||||
f'Please choose from AgeGenderWeights, FairFaceWeights, or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
def predict_attributes(image: np.ndarray, faces: list[Face], predictor: Attribute) -> list[Face]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
|
||||
This function iterates through a list of face detections, runs the
|
||||
specified attribute predictor on each one, and appends the results back
|
||||
into the detection dictionary.
|
||||
This function iterates through a list of Face objects, runs the
|
||||
specified attribute predictor on each one, and updates the Face
|
||||
objects with the predicted attributes.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full input image in BGR format.
|
||||
detections (List[Dict]): A list of detection results, where each dict
|
||||
must contain a 'bbox' and optionally 'landmark'.
|
||||
faces (List[Face]): A list of Face objects from face detection.
|
||||
predictor (Attribute): An initialized attribute predictor instance,
|
||||
created by `create_attribute_predictor`.
|
||||
|
||||
Returns:
|
||||
The list of detections, where each dictionary is updated with a new
|
||||
'attributes' key containing the prediction result.
|
||||
List[Face]: The list of Face objects with updated attribute fields.
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
|
||||
for face in faces:
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender_id, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender_id'] = gender_id
|
||||
face['attributes']['age'] = age
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age = result.age
|
||||
elif isinstance(predictor, FairFace):
|
||||
result = predictor(image, face.bbox)
|
||||
face.gender = result.gender
|
||||
face.age_group = result.age_group
|
||||
face.race = result.race
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
result = predictor(image, face.landmarks)
|
||||
face.emotion = result.emotion
|
||||
face.emotion_confidence = result.confidence
|
||||
|
||||
return detections
|
||||
return faces
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.face_utils import bbox_center_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
@@ -35,7 +35,7 @@ class AgeGender(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
|
||||
input_size: Optional[Tuple[int, int]] = None,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the AgeGender prediction model.
|
||||
@@ -81,7 +81,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Aligns the face based on the bounding box and preprocesses it for inference.
|
||||
|
||||
@@ -111,7 +111,7 @@ class AgeGender(Attribute):
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -119,16 +119,15 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
|
||||
and age (in years).
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = int(np.argmax(prediction[:2]))
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender_id, age
|
||||
return AttributeResult(gender=gender, age=age)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> AttributeResult:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -137,75 +136,8 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male) and age (in years).
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender_id, age = self.postprocess(prediction)
|
||||
return gender_id, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
age_gender_predictor = AgeGender()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender_id, age = age_gender_predictor.predict(frame, box)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{gender_str}, {age}'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
return self.postprocess(prediction)
|
||||
|
||||
@@ -7,6 +7,10 @@ from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import AttributeResult, EmotionResult
|
||||
|
||||
__all__ = ['Attribute', 'AttributeResult', 'EmotionResult']
|
||||
|
||||
|
||||
class Attribute(ABC):
|
||||
"""
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,6 +12,7 @@ from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.types import EmotionResult
|
||||
|
||||
__all__ = ['Emotion']
|
||||
|
||||
@@ -29,7 +29,7 @@ class Emotion(Attribute):
|
||||
def __init__(
|
||||
self,
|
||||
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
|
||||
input_size: Tuple[int, int] = (112, 112),
|
||||
input_size: tuple[int, int] = (112, 112),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the emotion recognition model.
|
||||
@@ -81,7 +81,7 @@ class Emotion(Attribute):
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
def preprocess(self, image: np.ndarray, landmark: list | np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
Aligns the face using landmarks and preprocesses it into a tensor.
|
||||
|
||||
@@ -106,7 +106,7 @@ class Emotion(Attribute):
|
||||
|
||||
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
|
||||
|
||||
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
|
||||
def postprocess(self, prediction: torch.Tensor) -> EmotionResult:
|
||||
"""
|
||||
Processes the raw model output to get the emotion label and confidence score.
|
||||
"""
|
||||
@@ -114,9 +114,9 @@ class Emotion(Attribute):
|
||||
pred_index = np.argmax(probabilities)
|
||||
emotion_label = self.emotion_labels[pred_index]
|
||||
confidence = float(probabilities[pred_index])
|
||||
return emotion_label, confidence
|
||||
return EmotionResult(emotion=emotion_label, confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
|
||||
def predict(self, image: np.ndarray, landmark: list | np.ndarray) -> EmotionResult:
|
||||
"""
|
||||
Predicts the emotion from a single face specified by its landmarks.
|
||||
"""
|
||||
@@ -127,68 +127,3 @@ class Emotion(Attribute):
|
||||
output = output[0]
|
||||
|
||||
return self.postprocess(output)
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == '__main__':
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
# 2. Initialize the attribute predictor
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
# This method returns a list of dictionaries for each detected face.
|
||||
detections = detector.detect(frame)
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f'{emotion} ({confidence:.2f})'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print('Inference stopped.')
|
||||
|
||||
193
uniface/attribute/fairface.py
Normal file
193
uniface/attribute/fairface.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.constants import FairFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import AttributeResult
|
||||
|
||||
__all__ = ['AGE_LABELS', 'RACE_LABELS', 'FairFace']
|
||||
|
||||
# Label definitions
|
||||
RACE_LABELS = [
|
||||
'White',
|
||||
'Black',
|
||||
'Latino Hispanic',
|
||||
'East Asian',
|
||||
'Southeast Asian',
|
||||
'Indian',
|
||||
'Middle Eastern',
|
||||
]
|
||||
AGE_LABELS = ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', '70+']
|
||||
|
||||
|
||||
class FairFace(Attribute):
|
||||
"""
|
||||
FairFace attribute prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting race (7 categories), gender (2 categories),
|
||||
and age (9 groups) from a face image. It requires a bounding box to locate the face.
|
||||
|
||||
The model is trained on the FairFace dataset which provides balanced demographics
|
||||
for more equitable predictions across different racial and gender groups.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
Defaults to `FairFaceWeights.DEFAULT`.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224). Defaults to None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: FairFaceWeights = FairFaceWeights.DEFAULT,
|
||||
input_size: tuple[int, int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FairFace prediction model.
|
||||
|
||||
Args:
|
||||
model_name (FairFaceWeights): The enum specifying the model weights to load.
|
||||
input_size (Optional[Tuple[int, int]]): Input size (height, width).
|
||||
If None, defaults to (224, 224).
|
||||
"""
|
||||
Logger.info(f'Initializing FairFace with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self.input_size = input_size if input_size is not None else (224, 224)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initializes the ONNX model and creates an inference session.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
# Get model input details from the loaded model
|
||||
input_meta = self.session.get_inputs()[0]
|
||||
self.input_name = input_meta.name
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f'Successfully initialized FairFace model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(
|
||||
f"Failed to load FairFace model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize FairFace model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> np.ndarray:
|
||||
"""
|
||||
Preprocesses the face image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image blob ready for inference.
|
||||
"""
|
||||
# Crop face if bbox provided
|
||||
if bbox is not None:
|
||||
bbox = np.asarray(bbox, dtype=int)
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
|
||||
# Add padding (25% of face size)
|
||||
w, h = x2 - x1, y2 - y1
|
||||
padding = 0.25
|
||||
x_pad = int(w * padding)
|
||||
y_pad = int(h * padding)
|
||||
|
||||
x1 = max(0, x1 - x_pad)
|
||||
y1 = max(0, y1 - y_pad)
|
||||
x2 = min(image.shape[1], x2 + x_pad)
|
||||
y2 = min(image.shape[0], y2 + y_pad)
|
||||
|
||||
image = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize to input size (width, height for cv2.resize)
|
||||
image = cv2.resize(image, self.input_size[::-1])
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize with ImageNet mean and std
|
||||
image = image.astype(np.float32) / 255.0
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
image = (image - mean) / std
|
||||
|
||||
# Transpose to CHW format and add batch dimension
|
||||
image = np.transpose(image, (2, 0, 1))
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, prediction: tuple[np.ndarray, np.ndarray, np.ndarray]) -> AttributeResult:
|
||||
"""
|
||||
Processes the raw model output to extract race, gender, and age.
|
||||
|
||||
Args:
|
||||
prediction (Tuple[np.ndarray, np.ndarray, np.ndarray]): Raw outputs from model
|
||||
(race_logits, gender_logits, age_logits).
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing gender (0=Female, 1=Male), age_group, and race.
|
||||
"""
|
||||
race_logits, gender_logits, age_logits = prediction
|
||||
|
||||
# Apply softmax
|
||||
race_probs = self._softmax(race_logits[0])
|
||||
gender_probs = self._softmax(gender_logits[0])
|
||||
age_probs = self._softmax(age_logits[0])
|
||||
|
||||
# Get predictions
|
||||
race_idx = int(np.argmax(race_probs))
|
||||
raw_gender_idx = int(np.argmax(gender_probs))
|
||||
age_idx = int(np.argmax(age_probs))
|
||||
|
||||
# Normalize gender: model outputs 0=Male, 1=Female → standard 0=Female, 1=Male
|
||||
gender = 1 - raw_gender_idx
|
||||
|
||||
return AttributeResult(
|
||||
gender=gender,
|
||||
age_group=AGE_LABELS[age_idx],
|
||||
race=RACE_LABELS[race_idx],
|
||||
)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray | None = None) -> AttributeResult:
|
||||
"""
|
||||
Predicts race, gender, and age for a face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The input image in BGR format.
|
||||
bbox (Optional[Union[List, np.ndarray]]): Face bounding box [x1, y1, x2, y2].
|
||||
If None, uses the entire image.
|
||||
|
||||
Returns:
|
||||
AttributeResult: Result containing:
|
||||
- gender: 0=Female, 1=Male
|
||||
- age_group: Age range string like "20-29"
|
||||
- race: Race/ethnicity label
|
||||
"""
|
||||
# Preprocess
|
||||
input_blob = self.preprocess(image, bbox)
|
||||
|
||||
# Inference
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_blob})
|
||||
|
||||
# Postprocess
|
||||
return self.postprocess(outputs)
|
||||
|
||||
@staticmethod
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
"""Compute softmax values for numerical stability."""
|
||||
exp_x = np.exp(x - np.max(x))
|
||||
return exp_x / np.sum(exp_x)
|
||||
@@ -2,34 +2,42 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
'resize_image',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'resize_image',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
def resize_image(
|
||||
frame: np.ndarray,
|
||||
target_shape: tuple[int, int] = (640, 640),
|
||||
) -> tuple[np.ndarray, float]:
|
||||
"""Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
The image is resized to fit within the target dimensions and placed on a
|
||||
blank canvas (zero-padded to target size).
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
frame: Input image with shape (H, W, C).
|
||||
target_shape: Target size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
A tuple containing:
|
||||
- Resized image on a blank canvas with shape (height, width, 3).
|
||||
- The resize factor as a float.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
@@ -53,16 +61,16 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
def generate_anchors(image_size: tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
image_size: Input image size as (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
|
||||
Anchor box coordinates as a numpy array with shape (num_anchors, 4).
|
||||
"""
|
||||
# RetinaFace FPN strides and corresponding anchor sizes per level
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
@@ -85,16 +93,15 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> list[int]:
|
||||
"""Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes.
|
||||
|
||||
Args:
|
||||
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
dets: Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold: IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
List[int]: Indices of bounding boxes retained after suppression.
|
||||
Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -125,18 +132,22 @@ def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time (RetinaFace specific).
|
||||
def decode_boxes(
|
||||
loc: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode locations from predictions using priors (RetinaFace specific).
|
||||
|
||||
Undoes the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
loc: Location predictions for loc layers, shape: [num_priors, 4].
|
||||
priors: Prior boxes in center-offset form, shape: [num_priors, 4].
|
||||
variances: Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
|
||||
Decoded bounding box predictions with shape [num_priors, 4].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -155,18 +166,19 @@ def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[f
|
||||
|
||||
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
|
||||
predictions: np.ndarray,
|
||||
priors: np.ndarray,
|
||||
variances: list[float] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
"""Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
predictions: Landmark predictions, shape: [num_priors, 10].
|
||||
priors: Prior boxes, shape: [num_priors, 4].
|
||||
variances: Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
Decoded landmarks, shape: [num_priors, 10].
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
@@ -187,18 +199,21 @@ def decode_landmarks(
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to bounding box (SCRFD specific).
|
||||
def distance2bbox(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom) with shape (n, 4).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to 4 boundaries
|
||||
(left, top, right, bottom) with shape (n, 4).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
@@ -219,17 +234,20 @@ def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to keypoints (SCRFD specific).
|
||||
def distance2kps(
|
||||
points: np.ndarray,
|
||||
distance: np.ndarray,
|
||||
max_shape: tuple[int, int] | None = None,
|
||||
) -> np.ndarray:
|
||||
"""Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
points: Anchor points with shape (n, 2), [x, y].
|
||||
distance: Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape: Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded keypoints with shape (n, 2k).
|
||||
Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
@@ -88,6 +87,15 @@ class AgeGenderWeights(str, Enum):
|
||||
DEFAULT = "age_gender"
|
||||
|
||||
|
||||
class FairFaceWeights(str, Enum):
|
||||
"""
|
||||
FairFace attribute prediction (race, gender, age).
|
||||
Trained on FairFace dataset with balanced demographics.
|
||||
https://github.com/yakhyo/fairface-onnx
|
||||
"""
|
||||
DEFAULT = "fairface"
|
||||
|
||||
|
||||
class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
MobileNet 0.5 from Insightface
|
||||
@@ -119,7 +127,21 @@ class ParsingWeights(str, Enum):
|
||||
RESNET34 = "parsing_resnet34"
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
class MiniFASNetWeights(str, Enum):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing models.
|
||||
Trained on face anti-spoofing datasets.
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Model Variants:
|
||||
- V1SE: Uses scale=4.0 for face crop (squeese-and-excitation version)
|
||||
- V2: Uses scale=2.7 for face crop (improved version)
|
||||
"""
|
||||
V1SE = "minifasnet_v1se"
|
||||
V2 = "minifasnet_v2"
|
||||
|
||||
|
||||
MODEL_URLS: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
@@ -150,6 +172,8 @@ MODEL_URLS: Dict[Enum, str] = {
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: 'https://github.com/yakhyo/fairface-onnx/releases/download/weights/fairface.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
# Gaze (MobileGaze)
|
||||
@@ -161,9 +185,12 @@ MODEL_URLS: Dict[Enum, str] = {
|
||||
# Parsing
|
||||
ParsingWeights.RESNET18: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet18.onnx',
|
||||
ParsingWeights.RESNET34: 'https://github.com/yakhyo/face-parsing/releases/download/weights/resnet34.onnx',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV1SE.onnx',
|
||||
MiniFASNetWeights.V2: 'https://github.com/yakhyo/face-anti-spoofing/releases/download/weights/MiniFASNetV2.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
MODEL_SHA256: dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
@@ -194,6 +221,8 @@ MODEL_SHA256: Dict[Enum, str] = {
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# FairFace
|
||||
FairFaceWeights.DEFAULT: '9c8c47d437cd310538d233f2465f9ed0524cb7fb51882a37f74e8bc22437fdbf',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
# MobileGaze (trained on Gaze360)
|
||||
@@ -205,6 +234,9 @@ MODEL_SHA256: Dict[Enum, str] = {
|
||||
# Face Parsing
|
||||
ParsingWeights.RESNET18: '0d9bd318e46987c3bdbfacae9e2c0f461cae1c6ac6ea6d43bbe541a91727e33f',
|
||||
ParsingWeights.RESNET34: '5b805bba7b5660ab7070b5a381dcf75e5b3e04199f1e9387232a77a00095102e',
|
||||
# Anti-Spoofing (MiniFASNet)
|
||||
MiniFASNetWeights.V1SE: 'ebab7f90c7833fbccd46d3a555410e78d969db5438e169b6524be444862b3676',
|
||||
MiniFASNetWeights.V2: 'b32929adc2d9c34b9486f8c4c7bc97c1b69bc0ea9befefc380e4faae4e463907',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -2,47 +2,53 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
from .yolov5 import YOLOv5Face
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
# Global cache for detector instances (keyed by method name + config hash)
|
||||
_detector_cache: dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs: Any) -> list[Face]:
|
||||
"""High-level face detection function.
|
||||
|
||||
Detects faces in an image using the specified detection method.
|
||||
Results are cached for repeated calls with the same configuration.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array.
|
||||
method (str): Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
method: Detection method to use. Options: 'retinaface', 'scrfd', 'yolov5face'.
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
A list of Face objects, each containing:
|
||||
- bbox: [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence: The confidence score of the detection.
|
||||
- landmarks: 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> import cv2
|
||||
>>> image = cv2.imread('your_image.jpg')
|
||||
>>> faces = detect_faces(image, method='retinaface', confidence_threshold=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
... print(f'Found face with confidence: {face.confidence}')
|
||||
... print(f'BBox: {face.bbox}')
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f'{method_name}_{str(sorted_kwargs)}'
|
||||
cache_key = f'{method_name}_{sorted_kwargs!s}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
@@ -52,49 +58,36 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
def create_detector(method: str = 'retinaface', **kwargs: Any) -> BaseDetector:
|
||||
"""Factory function to create face detectors.
|
||||
|
||||
Args:
|
||||
method (str): Detection method. Options:
|
||||
method: Detection method. Options:
|
||||
- 'retinaface': RetinaFace detector (default)
|
||||
- 'scrfd': SCRFD detector (fast and accurate)
|
||||
- 'yolov5face': YOLOv5-Face detector (accurate with landmarks)
|
||||
**kwargs: Detector-specific parameters
|
||||
**kwargs: Detector-specific parameters.
|
||||
|
||||
Returns:
|
||||
BaseDetector: Initialized detector instance
|
||||
Initialized detector instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If method is not supported
|
||||
ValueError: If method is not supported.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> # Basic usage
|
||||
>>> detector = create_detector('retinaface')
|
||||
|
||||
>>> # SCRFD detector with custom parameters
|
||||
>>> from uniface.constants import SCRFDWeights
|
||||
>>> detector = create_detector(
|
||||
... 'scrfd',
|
||||
... model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
... conf_thresh=0.8,
|
||||
... input_size=(640, 640)
|
||||
... 'scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, confidence_threshold=0.8, input_size=(640, 640)
|
||||
... )
|
||||
|
||||
>>> # RetinaFace detector
|
||||
>>> from uniface.constants import RetinaFaceWeights
|
||||
>>> detector = create_detector(
|
||||
... 'retinaface',
|
||||
... model_name=RetinaFaceWeights.MNET_V2,
|
||||
... conf_thresh=0.8,
|
||||
... nms_thresh=0.4
|
||||
... )
|
||||
|
||||
>>> # YOLOv5-Face detector
|
||||
>>> detector = create_detector(
|
||||
... 'yolov5face',
|
||||
... model_name=YOLOv5FaceWeights.YOLOV5S,
|
||||
... conf_thresh=0.25,
|
||||
... nms_thresh=0.45
|
||||
... 'retinaface', model_name=RetinaFaceWeights.MNET_V2, confidence_threshold=0.8, nms_threshold=0.4
|
||||
... )
|
||||
"""
|
||||
method = method.lower()
|
||||
@@ -113,12 +106,12 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
List all available detection methods with their descriptions and parameters.
|
||||
def list_available_detectors() -> dict[str, dict[str, Any]]:
|
||||
"""List all available detection methods with their descriptions and parameters.
|
||||
|
||||
Returns:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
Dictionary mapping detector names to their information including
|
||||
description, landmark support, paper reference, and default parameters.
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
@@ -127,8 +120,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -138,8 +131,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'confidence_threshold': 0.5,
|
||||
'nms_threshold': 0.4,
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
@@ -149,8 +142,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'paper': 'https://arxiv.org/abs/2105.12931',
|
||||
'default_params': {
|
||||
'model_name': 'yolov5s_face',
|
||||
'conf_thresh': 0.25,
|
||||
'nms_thresh': 0.45,
|
||||
'confidence_threshold': 0.25,
|
||||
'nms_threshold': 0.45,
|
||||
'input_size': 640,
|
||||
},
|
||||
},
|
||||
@@ -158,11 +151,11 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'BaseDetector',
|
||||
'RetinaFace',
|
||||
'YOLOv5Face',
|
||||
'BaseDetector',
|
||||
'create_detector',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
]
|
||||
|
||||
@@ -2,75 +2,82 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import Face
|
||||
|
||||
__all__ = ['BaseDetector']
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
Abstract base class for all face detectors.
|
||||
"""Abstract base class for all face detectors.
|
||||
|
||||
This class defines the interface that all face detectors must implement,
|
||||
ensuring consistency across different detection methods.
|
||||
|
||||
Attributes:
|
||||
config: Dictionary containing detector configuration parameters.
|
||||
_supports_landmarks: Flag indicating if detector supports landmark detection.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the detector with configuration parameters."""
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
def __init__(self, **kwargs: Any) -> None:
|
||||
"""Initialize the detector with configuration parameters.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as numpy array with shape (H, W, C)
|
||||
**kwargs: Additional detection parameters
|
||||
**kwargs: Detector-specific configuration parameters.
|
||||
"""
|
||||
self.config: dict[str, Any] = kwargs
|
||||
self._supports_landmarks: bool = False
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Detect faces in an image.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
List of detected Face objects, each containing:
|
||||
- bbox: Bounding box coordinates with shape (4,) as [x1, y1, x2, y2].
|
||||
- confidence: Detection confidence score (0.0 to 1.0).
|
||||
- landmarks: Facial landmarks with shape (5, 2) for 5-point landmarks.
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess input image for detection.
|
||||
"""Preprocess input image for detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Preprocessed image tensor
|
||||
Preprocessed image tensor ready for inference.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Any:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
def postprocess(self, outputs: Any, **kwargs: Any) -> Any:
|
||||
"""Postprocess model outputs to get final detections.
|
||||
|
||||
Args:
|
||||
outputs: Raw model outputs
|
||||
**kwargs: Additional postprocessing parameters
|
||||
outputs: Raw model outputs.
|
||||
**kwargs: Additional postprocessing parameters.
|
||||
|
||||
Returns:
|
||||
Any: Processed outputs (implementation-specific format, typically tuple of arrays)
|
||||
Processed outputs (implementation-specific format, typically tuple of arrays).
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
@@ -82,23 +89,33 @@ class BaseDetector(ABC):
|
||||
|
||||
@property
|
||||
def supports_landmarks(self) -> bool:
|
||||
"""
|
||||
Whether this detector supports landmark detection.
|
||||
"""Whether this detector supports landmark detection.
|
||||
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
True if landmarks are supported, False otherwise.
|
||||
"""
|
||||
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detector information and configuration.
|
||||
def get_info(self) -> dict[str, Any]:
|
||||
"""Get detector information and configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Detector information
|
||||
Dictionary containing detector name, landmark support, and config.
|
||||
"""
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config,
|
||||
}
|
||||
|
||||
def __call__(self, image: np.ndarray, **kwargs: Any) -> list[Face]:
|
||||
"""Callable shortcut for the `detect` method.
|
||||
|
||||
Args:
|
||||
image: Input image as numpy array with shape (H, W, C) in BGR format.
|
||||
**kwargs: Additional detection parameters.
|
||||
|
||||
Returns:
|
||||
List of detected Face objects.
|
||||
"""
|
||||
return self.detect(image, **kwargs)
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -17,6 +19,7 @@ from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
@@ -31,8 +34,8 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Args:
|
||||
model_name (RetinaFaceWeights): Model weights to use. Defaults to `RetinaFaceWeights.MNET_V2`.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-maximum suppression (NMS) IoU threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Fixed input size (width, height) if `dynamic_size=False`.
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
@@ -43,8 +46,8 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold for confidence-based filtering.
|
||||
nms_thresh (float): IoU threshold used for NMS.
|
||||
confidence_threshold (float): Threshold for confidence-based filtering.
|
||||
nms_threshold (float): IoU threshold used for NMS.
|
||||
pre_nms_topk (int): Limit on proposals before applying NMS.
|
||||
post_nms_topk (int): Limit on retained detections after NMS.
|
||||
dynamic_size (bool): Flag indicating dynamic or static input sizing.
|
||||
@@ -62,23 +65,23 @@ class RetinaFace(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: RetinaFaceWeights = RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
@@ -87,8 +90,8 @@ class RetinaFace(BaseDetector):
|
||||
self.dynamic_size = kwargs.get('dynamic_size', False)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing RetinaFace with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -104,14 +107,13 @@ class RetinaFace(BaseDetector):
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
@@ -136,14 +138,14 @@ class RetinaFace(BaseDetector):
|
||||
image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs (location, confidence, landmarks).
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
@@ -154,7 +156,7 @@ class RetinaFace(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -168,19 +170,19 @@ class RetinaFace(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -229,51 +231,53 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
resize_factor: float,
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Process the model outputs into final detection results.
|
||||
|
||||
Args:
|
||||
outputs (List[np.ndarray]): Raw outputs from the detection model.
|
||||
outputs: Raw outputs from the detection model containing:
|
||||
- outputs[0]: Location predictions (bounding box coordinates).
|
||||
- outputs[1]: Class confidence scores.
|
||||
- outputs[2]: Landmark predictions.
|
||||
resize_factor (float): Factor used to resize the input image during preprocessing.
|
||||
shape (Tuple[int, int]): Original shape of the image as (height, width).
|
||||
resize_factor: Factor used to resize the input image during preprocessing.
|
||||
shape: Original shape of the image as (width, height).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Processed results containing:
|
||||
- detections (np.ndarray): Array of detected bounding boxes with confidence scores.
|
||||
Shape: (num_detections, 5), where each row is [x_min, y_min, x_max, y_max, score].
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
A tuple containing:
|
||||
- detections: Array of detected bounding boxes with confidence scores,
|
||||
shape (num_detections, 5), each row is [x1, y1, x2, y2, score].
|
||||
- landmarks: Array of detected facial landmarks,
|
||||
shape (num_detections, 5, 2), each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = (
|
||||
location_predictions, confidence_scores, landmark_predictions = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
landmarks = decode_landmarks(landmarks, self._priors)
|
||||
boxes = decode_boxes(location_predictions, self._priors)
|
||||
landmarks = decode_landmarks(landmark_predictions, self._priors)
|
||||
|
||||
boxes, landmarks = self._scale_detections(boxes, landmarks, resize_factor, shape=(shape[0], shape[1]))
|
||||
|
||||
# Extract confidence scores for the face class
|
||||
scores = conf[:, 1]
|
||||
mask = scores > self.conf_thresh
|
||||
scores = confidence_scores[:, 1]
|
||||
mask = scores > self.confidence_threshold
|
||||
|
||||
# Filter by confidence threshold
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
@@ -284,7 +288,7 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections, self.nms_thresh)
|
||||
keep = non_max_suppression(detections, self.nms_threshold)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
@@ -302,9 +306,9 @@ class RetinaFace(BaseDetector):
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
shape: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Scale bounding boxes and landmarks to the original image size."""
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
|
||||
@@ -312,72 +316,3 @@ class RetinaFace(BaseDetector):
|
||||
landmarks = landmarks * landmark_scale / resize_factor
|
||||
|
||||
return boxes, landmarks
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
@@ -12,6 +13,7 @@ from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
@@ -29,8 +31,8 @@ class SCRFD(BaseDetector):
|
||||
Args:
|
||||
model_name (SCRFDWeights): Predefined model enum (e.g., `SCRFD_10G_KPS`).
|
||||
Specifies the SCRFD variant to load. Defaults to SCRFD_10G_KPS.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.5.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.4.
|
||||
input_size (Tuple[int, int]): Input image size (width, height).
|
||||
Defaults to (640, 640).
|
||||
Note: Non-default sizes may cause slower inference and CoreML compatibility issues.
|
||||
@@ -38,10 +40,10 @@ class SCRFD(BaseDetector):
|
||||
|
||||
Attributes:
|
||||
model_name (SCRFDWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (Tuple[int, int]): Image size to which inputs are resized before inference.
|
||||
_fmc (int): Number of feature map levels used in the model.
|
||||
_num_feature_maps (int): Number of feature map levels used in the model.
|
||||
_feat_stride_fpn (List[int]): Feature map strides corresponding to each detection level.
|
||||
_num_anchors (int): Number of anchors per feature location.
|
||||
_center_cache (Dict): Cached anchor centers for efficient forward passes.
|
||||
@@ -56,35 +58,35 @@ class SCRFD(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: SCRFDWeights = SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh: float = 0.5,
|
||||
nms_thresh: float = 0.4,
|
||||
input_size: Tuple[int, int] = (640, 640),
|
||||
confidence_threshold: float = 0.5,
|
||||
nms_threshold: float = 0.4,
|
||||
input_size: tuple[int, int] = (640, 640),
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# ------- SCRFD model params ------
|
||||
self._fmc = 3
|
||||
self._num_feature_maps = 3
|
||||
self._feat_stride_fpn = [8, 16, 32]
|
||||
self._num_anchors = 2
|
||||
self._center_cache = {}
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f'Initializing SCRFD with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing SCRFD with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -95,14 +97,13 @@ class SCRFD(BaseDetector):
|
||||
self._initialize_model(self._model_path)
|
||||
|
||||
def _initialize_model(self, model_path: str) -> None:
|
||||
"""
|
||||
Initializes an ONNX model session from the given path.
|
||||
"""Initialize an ONNX model session from the given path.
|
||||
|
||||
Args:
|
||||
model_path (str): The file path to the ONNX model.
|
||||
model_path: The file path to the ONNX model.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load, logs an error and raises an exception.
|
||||
RuntimeError: If the model fails to load.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(model_path)
|
||||
@@ -113,14 +114,14 @@ class SCRFD(BaseDetector):
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int]]:
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""Preprocess image for inference.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image
|
||||
image: Input image with shape (H, W, C).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, Tuple[int, int]]: Preprocessed blob and input size
|
||||
Preprocessed image tensor with shape (1, C, H, W).
|
||||
"""
|
||||
image = image.astype(np.float32)
|
||||
image = (image - 127.5) / 127.5
|
||||
@@ -129,29 +130,42 @@ class SCRFD(BaseDetector):
|
||||
|
||||
return image
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
input_tensor (np.ndarray): Preprocessed input tensor.
|
||||
input_tensor: Preprocessed input tensor with shape (1, C, H, W).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: Raw model outputs.
|
||||
List of raw model outputs.
|
||||
"""
|
||||
return self.session.run(self.output_names, {self.input_names: input_tensor})
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], image_size: Tuple[int, int]):
|
||||
scores_list = []
|
||||
def postprocess(
|
||||
self,
|
||||
outputs: list[np.ndarray],
|
||||
image_size: tuple[int, int],
|
||||
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
|
||||
"""Process model outputs into detection results.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the detection model.
|
||||
image_size: Size of the input image as (height, width).
|
||||
|
||||
Returns:
|
||||
Tuple of (scores_list, bboxes_list, landmarks_list).
|
||||
"""
|
||||
scores_list: list[np.ndarray] = []
|
||||
bboxes_list = []
|
||||
kpss_list = []
|
||||
|
||||
image_size = image_size
|
||||
|
||||
fmc = self._fmc
|
||||
num_feature_maps = self._num_feature_maps
|
||||
for idx, stride in enumerate(self._feat_stride_fpn):
|
||||
scores = outputs[idx]
|
||||
bbox_preds = outputs[fmc + idx] * stride
|
||||
kps_preds = outputs[2 * fmc + idx] * stride
|
||||
bbox_preds = outputs[num_feature_maps + idx] * stride
|
||||
kps_preds = outputs[2 * num_feature_maps + idx] * stride
|
||||
|
||||
# Generate anchors
|
||||
fm_height = image_size[0] // stride
|
||||
@@ -171,7 +185,7 @@ class SCRFD(BaseDetector):
|
||||
if len(self._center_cache) < 100:
|
||||
self._center_cache[cache_key] = anchor_centers
|
||||
|
||||
pos_indices = np.where(scores >= self.conf_thresh)[0]
|
||||
pos_indices = np.where(scores >= self.confidence_threshold)[0]
|
||||
if len(pos_indices) == 0:
|
||||
continue
|
||||
|
||||
@@ -193,7 +207,7 @@ class SCRFD(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -207,19 +221,19 @@ class SCRFD(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -247,7 +261,7 @@ class SCRFD(BaseDetector):
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_thresh)
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_threshold)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
@@ -280,78 +294,11 @@ class SCRFD(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, Literal
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -12,6 +12,7 @@ from uniface.constants import YOLOv5FaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
|
||||
@@ -29,8 +30,8 @@ class YOLOv5Face(BaseDetector):
|
||||
Args:
|
||||
model_name (YOLOv5FaceWeights): Predefined model enum (e.g., `YOLOV5S`).
|
||||
Specifies the YOLOv5-Face variant to load. Defaults to YOLOV5S.
|
||||
conf_thresh (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_thresh (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
confidence_threshold (float): Confidence threshold for filtering detections. Defaults to 0.6.
|
||||
nms_threshold (float): Non-Maximum Suppression threshold. Defaults to 0.5.
|
||||
input_size (int): Input image size. Defaults to 640.
|
||||
Note: ONNX model is fixed at 640. Changing this will cause inference errors.
|
||||
**kwargs: Advanced options:
|
||||
@@ -38,8 +39,8 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
Attributes:
|
||||
model_name (YOLOv5FaceWeights): Selected model variant.
|
||||
conf_thresh (float): Threshold used to filter low-confidence detections.
|
||||
nms_thresh (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
confidence_threshold (float): Threshold used to filter low-confidence detections.
|
||||
nms_threshold (float): Threshold used during NMS to suppress overlapping boxes.
|
||||
input_size (int): Image size to which inputs are resized before inference.
|
||||
max_det (int): Maximum number of detections to return.
|
||||
_model_path (str): Absolute path to the downloaded/verified model weights.
|
||||
@@ -53,15 +54,15 @@ class YOLOv5Face(BaseDetector):
|
||||
self,
|
||||
*,
|
||||
model_name: YOLOv5FaceWeights = YOLOv5FaceWeights.YOLOV5S,
|
||||
conf_thresh: float = 0.6,
|
||||
nms_thresh: float = 0.5,
|
||||
confidence_threshold: float = 0.6,
|
||||
nms_threshold: float = 0.5,
|
||||
input_size: int = 640,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
model_name=model_name,
|
||||
conf_thresh=conf_thresh,
|
||||
nms_thresh=nms_thresh,
|
||||
confidence_threshold=confidence_threshold,
|
||||
nms_threshold=nms_threshold,
|
||||
input_size=input_size,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -74,16 +75,16 @@ class YOLOv5Face(BaseDetector):
|
||||
)
|
||||
|
||||
self.model_name = model_name
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.nms_threshold = nms_threshold
|
||||
self.input_size = input_size
|
||||
|
||||
# Advanced options from kwargs
|
||||
self.max_det = kwargs.get('max_det', 750)
|
||||
|
||||
Logger.info(
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
f'Initializing YOLOv5Face with model={self.model_name}, confidence_threshold={self.confidence_threshold}, '
|
||||
f'nms_threshold={self.nms_threshold}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
@@ -112,7 +113,7 @@ class YOLOv5Face(BaseDetector):
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, Tuple[int, int]]:
|
||||
def preprocess(self, image: np.ndarray) -> tuple[np.ndarray, float, tuple[int, int]]:
|
||||
"""
|
||||
Preprocess image for inference.
|
||||
|
||||
@@ -153,7 +154,7 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
return img_batch, scale, (pad_w, pad_h)
|
||||
|
||||
def inference(self, input_tensor: np.ndarray) -> List[np.ndarray]:
|
||||
def inference(self, input_tensor: np.ndarray) -> list[np.ndarray]:
|
||||
"""Perform model inference on the preprocessed image tensor.
|
||||
|
||||
Args:
|
||||
@@ -168,8 +169,8 @@ class YOLOv5Face(BaseDetector):
|
||||
self,
|
||||
predictions: np.ndarray,
|
||||
scale: float,
|
||||
padding: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
padding: tuple[int, int],
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Postprocess model predictions.
|
||||
|
||||
@@ -189,7 +190,7 @@ class YOLOv5Face(BaseDetector):
|
||||
predictions = predictions[0] # Remove batch dimension
|
||||
|
||||
# Filter by confidence
|
||||
mask = predictions[:, 4] >= self.conf_thresh
|
||||
mask = predictions[:, 4] >= self.confidence_threshold
|
||||
predictions = predictions[mask]
|
||||
|
||||
if len(predictions) == 0:
|
||||
@@ -206,7 +207,7 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
# Apply NMS
|
||||
detections_for_nms = np.hstack((boxes, scores[:, None])).astype(np.float32, copy=False)
|
||||
keep = non_max_suppression(detections_for_nms, self.nms_thresh)
|
||||
keep = non_max_suppression(detections_for_nms, self.nms_threshold)
|
||||
|
||||
if len(keep) == 0:
|
||||
return np.array([]), np.array([])
|
||||
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
__all__ = ['Face']
|
||||
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
"""
|
||||
Detected face with analysis results.
|
||||
"""
|
||||
|
||||
# Required attributes
|
||||
bbox: np.ndarray
|
||||
confidence: float
|
||||
landmarks: np.ndarray
|
||||
|
||||
# Optional attributes
|
||||
embedding: Optional[np.ndarray] = None
|
||||
age: Optional[int] = None
|
||||
gender: Optional[int] = None # 0 or 1
|
||||
|
||||
def compute_similarity(self, other: 'Face') -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
if self.embedding is None or other.embedding is None:
|
||||
raise ValueError('Both faces must have embeddings for similarity computation')
|
||||
return float(compute_similarity(self.embedding, other.embedding))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return asdict(self)
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
if self.gender is None:
|
||||
return None
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
@property
|
||||
def bbox_xyxy(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
|
||||
return self.bbox.copy()
|
||||
|
||||
@property
|
||||
def bbox_xywh(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, w, h) format."""
|
||||
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.gender is not None:
|
||||
parts.append(f'sex={self.sex}')
|
||||
if self.embedding is not None:
|
||||
parts.append(f'embedding_dim={self.embedding.shape[0]}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,21 +2,21 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple, Union
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
|
||||
__all__ = [
|
||||
'face_alignment',
|
||||
'compute_similarity',
|
||||
'bbox_center_alignment',
|
||||
'compute_similarity',
|
||||
'face_alignment',
|
||||
'transform_points_2d',
|
||||
]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
# Standard 5-point facial landmark reference for ArcFace alignment (112x112)
|
||||
reference_alignment: np.ndarray = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
@@ -29,22 +29,25 @@ reference_alignment: np.ndarray = np.array(
|
||||
)
|
||||
|
||||
|
||||
def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Estimate the normalization transformation matrix for facial landmarks.
|
||||
def estimate_norm(
|
||||
landmark: np.ndarray,
|
||||
image_size: int | tuple[int, int] = 112,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Estimate the normalization transformation matrix for facial landmarks.
|
||||
|
||||
Args:
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
landmark: Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size: The size of the output image. Can be an integer (for square images)
|
||||
or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
|
||||
np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
|
||||
A tuple containing:
|
||||
- The 2x3 transformation matrix for aligning the landmarks.
|
||||
- The 2x3 inverse transformation matrix.
|
||||
|
||||
Raises:
|
||||
AssertionError: If the input landmark array does not have the shape (5, 2)
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
"""
|
||||
assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
|
||||
|
||||
@@ -80,23 +83,23 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: Union[int, Tuple[int, int]] = 112,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
image_size: int | tuple[int, int] = 112,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image as a NumPy array.
|
||||
landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
|
||||
image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
|
||||
Can be an integer (for square images) or a tuple (width, height). Default is 112.
|
||||
image: Input image as a NumPy array with shape (H, W, C).
|
||||
landmark: Array of shape (5, 2) representing the facial landmark coordinates.
|
||||
image_size: The size of the aligned output image. Can be an integer
|
||||
(for square images) or a tuple (width, height). Default is 112.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The aligned face as a NumPy array.
|
||||
np.ndarray: The 2x3 transformation matrix used for alignment.
|
||||
A tuple containing:
|
||||
- The aligned face as a NumPy array.
|
||||
- The 2x3 inverse transformation matrix used for alignment.
|
||||
"""
|
||||
# Get the transformation matrix
|
||||
M, M_inv = estimate_norm(landmark, image_size)
|
||||
transform_matrix, inverse_transform = estimate_norm(landmark, image_size)
|
||||
|
||||
# Handle both int and tuple for warpAffine output size
|
||||
if isinstance(image_size, int):
|
||||
@@ -105,44 +108,50 @@ def face_alignment(
|
||||
output_size = image_size
|
||||
|
||||
# Warp the input image to align the face
|
||||
warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
|
||||
warped = cv2.warpAffine(image, transform_matrix, output_size, borderValue=0.0)
|
||||
|
||||
return warped, M_inv
|
||||
return warped, inverse_transform
|
||||
|
||||
|
||||
def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
|
||||
"""Computing Similarity between two faces.
|
||||
"""Compute cosine similarity between two face embeddings.
|
||||
|
||||
Args:
|
||||
feat1 (np.ndarray): First embedding.
|
||||
feat2 (np.ndarray): Second embedding.
|
||||
normalized (bool): Set True if the embeddings are already L2 normalized.
|
||||
feat1: First embedding vector.
|
||||
feat2: Second embedding vector.
|
||||
normalized: Set True if the embeddings are already L2 normalized.
|
||||
|
||||
Returns:
|
||||
np.float32: Cosine similarity.
|
||||
Cosine similarity score in range [-1, 1].
|
||||
"""
|
||||
feat1 = feat1.ravel()
|
||||
feat2 = feat2.ravel()
|
||||
if normalized:
|
||||
return np.dot(feat1, feat2)
|
||||
else:
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
# Add small epsilon to prevent division by zero
|
||||
return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
|
||||
|
||||
|
||||
def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
"""
|
||||
Apply center-based alignment, scaling, and rotation to an image.
|
||||
def bbox_center_alignment(
|
||||
image: np.ndarray,
|
||||
center: tuple[float, float],
|
||||
output_size: int,
|
||||
scale: float,
|
||||
rotation: float,
|
||||
) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Apply center-based alignment, scaling, and rotation to an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image.
|
||||
center (Tuple[float, float]): Center point (e.g., face center from bbox).
|
||||
output_size (int): Desired output image size (square).
|
||||
scale (float): Scaling factor to zoom in/out.
|
||||
rotation (float): Rotation angle in degrees (clockwise).
|
||||
image: Input image with shape (H, W, C).
|
||||
center: Center point (x, y), e.g., face center from bbox.
|
||||
output_size: Desired output image size (square).
|
||||
scale: Scaling factor to zoom in/out.
|
||||
rotation: Rotation angle in degrees (clockwise).
|
||||
|
||||
Returns:
|
||||
cropped (np.ndarray): Aligned and cropped image.
|
||||
M (np.ndarray): 2x3 affine transform matrix used.
|
||||
A tuple containing:
|
||||
- Aligned and cropped image with shape (output_size, output_size, C).
|
||||
- 2x3 affine transform matrix used.
|
||||
"""
|
||||
|
||||
# Convert rotation from degrees to radians
|
||||
@@ -175,15 +184,14 @@ def bbox_center_alignment(image, center, output_size, scale, rotation):
|
||||
|
||||
|
||||
def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Apply a 2D affine transformation to an array of 2D points.
|
||||
"""Apply a 2D affine transformation to an array of 2D points.
|
||||
|
||||
Args:
|
||||
points (np.ndarray): An (N, 2) array of 2D points.
|
||||
transform (np.ndarray): A (2, 3) affine transformation matrix.
|
||||
points: An (N, 2) array of 2D points.
|
||||
transform: A (2, 3) affine transformation matrix.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Transformed (N, 2) array of points.
|
||||
Transformed (N, 2) array of points.
|
||||
"""
|
||||
transformed = np.zeros_like(points, dtype=np.float32)
|
||||
for i in range(points.shape[0]):
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from uniface.types import GazeResult
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
from .models import MobileGaze
|
||||
|
||||
@@ -34,13 +36,11 @@ def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstim
|
||||
|
||||
>>> # Create with MobileNetV2 backbone
|
||||
>>> from uniface.constants import GazeWeights
|
||||
>>> estimator = create_gaze_estimator(
|
||||
... 'mobilegaze',
|
||||
... model_name=GazeWeights.MOBILENET_V2
|
||||
... )
|
||||
>>> estimator = create_gaze_estimator('mobilegaze', model_name=GazeWeights.MOBILENET_V2)
|
||||
|
||||
>>> # Use the estimator
|
||||
>>> pitch, yaw = estimator.estimate(face_crop)
|
||||
>>> result = estimator.estimate(face_crop)
|
||||
>>> print(f'Pitch: {result.pitch}, Yaw: {result.yaw}')
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
@@ -51,8 +51,4 @@ def create_gaze_estimator(method: str = 'mobilegaze', **kwargs) -> BaseGazeEstim
|
||||
raise ValueError(f"Unsupported gaze estimation method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'create_gaze_estimator',
|
||||
'MobileGaze',
|
||||
'BaseGazeEstimator',
|
||||
]
|
||||
__all__ = ['BaseGazeEstimator', 'GazeResult', 'MobileGaze', 'create_gaze_estimator']
|
||||
|
||||
@@ -2,11 +2,16 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import GazeResult
|
||||
|
||||
__all__ = ['BaseGazeEstimator', 'GazeResult']
|
||||
|
||||
|
||||
class BaseGazeEstimator(ABC):
|
||||
"""
|
||||
@@ -54,7 +59,7 @@ class BaseGazeEstimator(ABC):
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[float, float]:
|
||||
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
@@ -66,12 +71,12 @@ class BaseGazeEstimator(ABC):
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def estimate(self, face_image: np.ndarray) -> Tuple[float, float]:
|
||||
def estimate(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
@@ -84,18 +89,18 @@ class BaseGazeEstimator(ABC):
|
||||
well-framed within the image.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians:
|
||||
GazeResult: Result containing pitch and yaw angles in radians:
|
||||
- pitch: Vertical gaze angle (positive = up, negative = down)
|
||||
- yaw: Horizontal gaze angle (positive = right, negative = left)
|
||||
|
||||
Example:
|
||||
>>> estimator = create_gaze_estimator()
|
||||
>>> pitch, yaw = estimator.estimate(face_crop)
|
||||
>>> print(f"Looking: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
>>> result = estimator.estimate(face_crop)
|
||||
>>> print(f'Looking: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the estimate method.')
|
||||
|
||||
def __call__(self, face_image: np.ndarray) -> Tuple[float, float]:
|
||||
def __call__(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `estimate` method.
|
||||
|
||||
@@ -103,6 +108,6 @@ class BaseGazeEstimator(ABC):
|
||||
face_image (np.ndarray): A cropped face image in BGR format.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: A tuple of (pitch, yaw) angles in radians.
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
return self.estimate(face_image)
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -11,6 +10,7 @@ from uniface.constants import GazeWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import GazeResult
|
||||
|
||||
from .base import BaseGazeEstimator
|
||||
|
||||
@@ -54,17 +54,17 @@ class MobileGaze(BaseGazeEstimator):
|
||||
>>> # Detect faces and estimate gaze for each
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox']
|
||||
... bbox = face.bbox
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
... print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
... result = gaze_estimator.estimate(face_crop)
|
||||
... print(f'Gaze: pitch={np.degrees(result.pitch):.1f}°, yaw={np.degrees(result.yaw):.1f}°')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: GazeWeights = GazeWeights.RESNET34,
|
||||
input_size: Tuple[int, int] = (448, 448),
|
||||
input_size: tuple[int, int] = (448, 448),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing MobileGaze with model={model_name}, input_size={input_size}')
|
||||
|
||||
@@ -143,7 +143,7 @@ class MobileGaze(BaseGazeEstimator):
|
||||
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
||||
return e_x / e_x.sum(axis=1, keepdims=True)
|
||||
|
||||
def postprocess(self, outputs: Tuple[np.ndarray, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def postprocess(self, outputs: tuple[np.ndarray, np.ndarray]) -> GazeResult:
|
||||
"""
|
||||
Postprocess raw model outputs into gaze angles.
|
||||
|
||||
@@ -155,7 +155,7 @@ class MobileGaze(BaseGazeEstimator):
|
||||
on the specific model architecture.
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: A tuple of (pitch, yaw) angles in radians.
|
||||
GazeResult: Result containing pitch and yaw angles in radians.
|
||||
"""
|
||||
pitch_logits, yaw_logits = outputs
|
||||
|
||||
@@ -168,12 +168,12 @@ class MobileGaze(BaseGazeEstimator):
|
||||
yaw_deg = np.sum(yaw_probs * self._idx_tensor, axis=1) * self._binwidth - self._angle_offset
|
||||
|
||||
# Convert degrees to radians
|
||||
pitch = np.radians(pitch_deg[0])
|
||||
yaw = np.radians(yaw_deg[0])
|
||||
pitch = float(np.radians(pitch_deg[0]))
|
||||
yaw = float(np.radians(yaw_deg[0]))
|
||||
|
||||
return pitch, yaw
|
||||
return GazeResult(pitch=pitch, yaw=yaw)
|
||||
|
||||
def estimate(self, face_image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def estimate(self, face_image: np.ndarray) -> GazeResult:
|
||||
"""
|
||||
Perform end-to-end gaze estimation on a face image.
|
||||
|
||||
@@ -182,6 +182,5 @@ class MobileGaze(BaseGazeEstimator):
|
||||
"""
|
||||
input_tensor = self.preprocess(face_image)
|
||||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||||
pitch, yaw = self.postprocess((outputs[0], outputs[1]))
|
||||
|
||||
return pitch, yaw
|
||||
return self.postprocess((outputs[0], outputs[1]))
|
||||
|
||||
@@ -25,4 +25,4 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = ['create_landmarker', 'Landmark106', 'BaseLandmarker']
|
||||
__all__ = ['BaseLandmarker', 'Landmark106', 'create_landmarker']
|
||||
|
||||
@@ -30,3 +30,15 @@ class BaseLandmarker(ABC):
|
||||
where N is the number of landmarks.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""Callable shortcut for the `get_landmarks` method.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (N, 2).
|
||||
"""
|
||||
return self.get_landmarks(image, bbox)
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -46,7 +45,7 @@ class Landmark106(BaseLandmarker):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192),
|
||||
input_size: tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
|
||||
self.input_size = input_size
|
||||
@@ -85,7 +84,7 @@ class Landmark106(BaseLandmarker):
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
|
||||
This method takes a face bounding box, performs a center alignment to
|
||||
@@ -155,58 +154,3 @@ class Landmark106(BaseLandmarker):
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == '__main__':
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
face_detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print('Webcam not available.')
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print('Frame capture failed.')
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face['bbox']
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
@@ -1,21 +1,41 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Logging utilities for UniFace.
|
||||
|
||||
This module provides a centralized logger for the UniFace library,
|
||||
allowing users to enable verbose logging when debugging or developing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
__all__ = ['Logger', 'enable_logging']
|
||||
|
||||
# Create logger for uniface
|
||||
Logger = logging.getLogger('uniface')
|
||||
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
|
||||
Logger.addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Enable verbose logging for uniface.
|
||||
def enable_logging(level: int = logging.INFO) -> None:
|
||||
"""Enable verbose logging for uniface.
|
||||
|
||||
Configures the logger to output messages to stdout with timestamps.
|
||||
Call this function to see informational messages during model loading
|
||||
and inference.
|
||||
|
||||
Args:
|
||||
level: Logging level (logging.DEBUG, logging.INFO, etc.)
|
||||
level: Logging level. Defaults to logging.INFO.
|
||||
Common values: logging.DEBUG, logging.INFO, logging.WARNING.
|
||||
|
||||
Example:
|
||||
>>> from uniface import enable_logging
|
||||
>>> import logging
|
||||
>>> enable_logging() # Show INFO logs
|
||||
>>> enable_logging(level=logging.DEBUG) # Show DEBUG logs
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
|
||||
@@ -2,6 +2,15 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Model weight management for UniFace.
|
||||
|
||||
This module handles downloading, caching, and verifying model weights
|
||||
using SHA-256 checksums for integrity validation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
@@ -14,33 +23,32 @@ from uniface.log import Logger
|
||||
__all__ = ['verify_model_weights']
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
"""
|
||||
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
|
||||
def verify_model_weights(model_name: Enum, root: str = '~/.uniface/models') -> str:
|
||||
"""Ensure model weights are present, downloading and verifying them if necessary.
|
||||
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`), this function checks if
|
||||
the corresponding `.onnx` weight file exists locally. If not, it downloads the file from a predefined URL.
|
||||
After download, the file’s integrity is verified using a SHA-256 hash. If verification fails, the file is deleted
|
||||
and an error is raised.
|
||||
Given a model identifier from an Enum class (e.g., `RetinaFaceWeights.MNET_V2`),
|
||||
this function checks if the corresponding weight file exists locally. If not,
|
||||
it downloads the file from a predefined URL and verifies its integrity using
|
||||
a SHA-256 hash.
|
||||
|
||||
Args:
|
||||
model_name (Enum): Model weight identifier (e.g., `RetinaFaceWeights.MNET_V2`, `ArcFaceWeights.RESNET`, etc.).
|
||||
root (str, optional): Directory to store or locate the model weights. Defaults to '~/.uniface/models'.
|
||||
model_name: Model weight identifier enum (e.g., `RetinaFaceWeights.MNET_V2`).
|
||||
root: Directory to store or locate the model weights.
|
||||
Defaults to '~/.uniface/models'.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the verified model weights file.
|
||||
Absolute path to the verified model weights file.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model is unknown or SHA-256 verification fails.
|
||||
ConnectionError: If downloading the file fails.
|
||||
|
||||
Examples:
|
||||
>>> from uniface.models import RetinaFaceWeights, verify_model_weights
|
||||
>>> verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
Example:
|
||||
>>> from uniface.constants import RetinaFaceWeights
|
||||
>>> from uniface.model_store import verify_model_weights
|
||||
>>> path = verify_model_weights(RetinaFaceWeights.MNET_V2)
|
||||
>>> print(path)
|
||||
'/home/user/.uniface/models/retinaface_mnet_v2.onnx'
|
||||
|
||||
>>> verify_model_weights(RetinaFaceWeights.RESNET34, root='/custom/dir')
|
||||
'/custom/dir/retinaface_r34.onnx'
|
||||
"""
|
||||
|
||||
root = os.path.expanduser(root)
|
||||
@@ -73,10 +81,16 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
return model_path
|
||||
|
||||
|
||||
def download_file(url: str, dest_path: str) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path."""
|
||||
def download_file(url: str, dest_path: str, timeout: int = 30) -> None:
|
||||
"""Download a file from a URL in chunks and save it to the destination path.
|
||||
|
||||
Args:
|
||||
url: URL to download from.
|
||||
dest_path: Local file path to save to.
|
||||
timeout: Connection timeout in seconds. Defaults to 30.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response = requests.get(url, stream=True, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
with (
|
||||
open(dest_path, 'wb') as file,
|
||||
|
||||
@@ -2,16 +2,23 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List
|
||||
"""ONNX Runtime utilities for UniFace.
|
||||
|
||||
This module provides helper functions for creating and managing ONNX Runtime
|
||||
inference sessions with automatic hardware acceleration detection.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import onnxruntime as ort
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['create_onnx_session', 'get_available_providers']
|
||||
|
||||
def get_available_providers() -> List[str]:
|
||||
"""
|
||||
Get list of available ONNX Runtime execution providers for the current platform.
|
||||
|
||||
def get_available_providers() -> list[str]:
|
||||
"""Get list of available ONNX Runtime execution providers.
|
||||
|
||||
Automatically detects and prioritizes hardware acceleration:
|
||||
- CoreML on Apple Silicon (M1/M2/M3/M4)
|
||||
@@ -19,13 +26,12 @@ def get_available_providers() -> List[str]:
|
||||
- CPU as fallback (always available)
|
||||
|
||||
Returns:
|
||||
List[str]: Ordered list of execution providers to use
|
||||
Ordered list of execution providers to use.
|
||||
|
||||
Examples:
|
||||
Example:
|
||||
>>> providers = get_available_providers()
|
||||
>>> # On M4 Mac: ['CoreMLExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On Linux with CUDA: ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
||||
>>> # On CPU-only: ['CPUExecutionProvider']
|
||||
"""
|
||||
available = ort.get_available_providers()
|
||||
providers = []
|
||||
@@ -48,26 +54,28 @@ def get_available_providers() -> List[str]:
|
||||
return providers
|
||||
|
||||
|
||||
def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.InferenceSession:
|
||||
"""
|
||||
Create an ONNX Runtime inference session with optimal provider selection.
|
||||
def create_onnx_session(
|
||||
model_path: str,
|
||||
providers: list[str] | None = None,
|
||||
) -> ort.InferenceSession:
|
||||
"""Create an ONNX Runtime inference session with optimal provider selection.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the ONNX model file
|
||||
providers (List[str], optional): List of providers to use.
|
||||
If None, automatically detects best available providers.
|
||||
model_path: Path to the ONNX model file.
|
||||
providers: List of execution providers to use. If None, automatically
|
||||
detects best available providers.
|
||||
|
||||
Returns:
|
||||
ort.InferenceSession: Configured ONNX Runtime session
|
||||
Configured ONNX Runtime session.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If session creation fails
|
||||
RuntimeError: If session creation fails.
|
||||
|
||||
Examples:
|
||||
>>> session = create_onnx_session("model.onnx")
|
||||
Example:
|
||||
>>> session = create_onnx_session('model.onnx')
|
||||
>>> # Automatically uses best available providers
|
||||
|
||||
>>> session = create_onnx_session("model.onnx", providers=["CPUExecutionProvider"])
|
||||
>>> session = create_onnx_session('model.onnx', providers=['CPUExecutionProvider'])
|
||||
>>> # Force CPU-only execution
|
||||
"""
|
||||
if providers is None:
|
||||
@@ -90,8 +98,7 @@ def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.Inf
|
||||
'CPUExecutionProvider': 'CPU',
|
||||
}
|
||||
provider_display = provider_names.get(active_provider, active_provider)
|
||||
Logger.debug(f'Model loaded with provider: {active_provider}')
|
||||
print(f'✓ Model loaded ({provider_display})')
|
||||
Logger.info(f'✓ Model loaded ({provider_display})')
|
||||
|
||||
return session
|
||||
except Exception as e:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Union
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import ParsingWeights
|
||||
|
||||
@@ -13,38 +13,29 @@ __all__ = ['BaseFaceParser', 'BiSeNet', 'create_face_parser']
|
||||
|
||||
|
||||
def create_face_parser(
|
||||
model_name: Union[str, ParsingWeights] = ParsingWeights.RESNET18,
|
||||
model_name: str | ParsingWeights = ParsingWeights.RESNET18,
|
||||
) -> BaseFaceParser:
|
||||
"""
|
||||
Factory function to create a face parsing model instance.
|
||||
"""Factory function to create a face parsing model instance.
|
||||
|
||||
This function provides a convenient way to instantiate face parsing models
|
||||
without directly importing the specific model classes. It supports both
|
||||
string-based and enum-based model selection.
|
||||
without directly importing the specific model classes.
|
||||
|
||||
Args:
|
||||
model_name (Union[str, ParsingWeights]): The face parsing model to create.
|
||||
Can be either a string or a ParsingWeights enum value.
|
||||
Available options:
|
||||
model_name: The face parsing model to create. Can be either a string
|
||||
or a ParsingWeights enum value. Available options:
|
||||
- 'parsing_resnet18' or ParsingWeights.RESNET18 (default)
|
||||
- 'parsing_resnet34' or ParsingWeights.RESNET34
|
||||
|
||||
Returns:
|
||||
BaseFaceParser: An instance of the requested face parsing model.
|
||||
An instance of the requested face parsing model.
|
||||
|
||||
Raises:
|
||||
ValueError: If the model_name is not recognized.
|
||||
|
||||
Examples:
|
||||
>>> # Using enum
|
||||
Example:
|
||||
>>> from uniface.parsing import create_face_parser
|
||||
>>> from uniface.constants import ParsingWeights
|
||||
>>> parser = create_face_parser(ParsingWeights.RESNET18)
|
||||
>>>
|
||||
>>> # Using string
|
||||
>>> parser = create_face_parser('parsing_resnet18')
|
||||
>>>
|
||||
>>> # Parse a face image
|
||||
>>> mask = parser.parse(face_crop)
|
||||
"""
|
||||
# Convert string to enum if necessary
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -53,7 +52,7 @@ class BaseFaceParser(ABC):
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
|
||||
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess raw model outputs into a segmentation mask.
|
||||
|
||||
@@ -89,7 +88,7 @@ class BaseFaceParser(ABC):
|
||||
Example:
|
||||
>>> parser = create_face_parser()
|
||||
>>> mask = parser.parse(face_crop)
|
||||
>>> print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
|
||||
>>> print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the parse method.')
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -54,17 +53,17 @@ class BiSeNet(BaseFaceParser):
|
||||
>>> # Detect faces and parse each face
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox']
|
||||
... bbox = face.bbox
|
||||
... x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
... face_crop = image[y1:y2, x1:x2]
|
||||
... mask = parser.parse(face_crop)
|
||||
... print(f"Mask shape: {mask.shape}, unique classes: {np.unique(mask)}")
|
||||
... print(f'Mask shape: {mask.shape}, unique classes: {np.unique(mask)}')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ParsingWeights = ParsingWeights.RESNET18,
|
||||
input_size: Tuple[int, int] = (512, 512),
|
||||
input_size: tuple[int, int] = (512, 512),
|
||||
) -> None:
|
||||
Logger.info(f'Initializing BiSeNet with model={model_name}, input_size={input_size}')
|
||||
|
||||
@@ -127,7 +126,7 @@ class BiSeNet(BaseFaceParser):
|
||||
|
||||
return image
|
||||
|
||||
def postprocess(self, outputs: np.ndarray, original_size: Tuple[int, int]) -> np.ndarray:
|
||||
def postprocess(self, outputs: np.ndarray, original_size: tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Postprocess model output to segmentation mask.
|
||||
|
||||
|
||||
52
uniface/privacy/__init__.py
Normal file
52
uniface/privacy/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .blur import BlurFace
|
||||
|
||||
|
||||
def anonymize_faces(
|
||||
image: np.ndarray,
|
||||
detector: object | None = None,
|
||||
method: str = 'pixelate',
|
||||
blur_strength: float = 3.0,
|
||||
pixel_blocks: int = 10,
|
||||
confidence_threshold: float = 0.5,
|
||||
**kwargs,
|
||||
) -> np.ndarray:
|
||||
"""One-line face anonymization with automatic detection.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
detector: Face detector instance. Creates RetinaFace if None.
|
||||
method (str): Blur method name. Defaults to 'pixelate'.
|
||||
blur_strength (float): Blur intensity. Defaults to 3.0.
|
||||
pixel_blocks (int): Block count for pixelate. Defaults to 10.
|
||||
confidence_threshold (float): Detection confidence threshold. Defaults to 0.5.
|
||||
**kwargs: Additional detector arguments.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anonymized image.
|
||||
|
||||
Example:
|
||||
>>> from uniface.privacy import anonymize_faces
|
||||
>>> anonymized = anonymize_faces(image, method='pixelate')
|
||||
"""
|
||||
if detector is None:
|
||||
try:
|
||||
from uniface import RetinaFace
|
||||
|
||||
detector = RetinaFace(confidence_threshold=confidence_threshold, **kwargs)
|
||||
except ImportError as err:
|
||||
raise ImportError('Could not import RetinaFace. Please ensure UniFace is properly installed.') from err
|
||||
|
||||
faces = detector.detect(image)
|
||||
blurrer = BlurFace(method=method, blur_strength=blur_strength, pixel_blocks=pixel_blocks)
|
||||
return blurrer.anonymize(image, faces)
|
||||
|
||||
|
||||
__all__ = ['BlurFace', 'anonymize_faces']
|
||||
200
uniface/privacy/blur.py
Normal file
200
uniface/privacy/blur.py
Normal file
@@ -0,0 +1,200 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
__all__ = ['BlurFace', 'EllipticalBlur']
|
||||
|
||||
|
||||
def _gaussian_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
|
||||
"""Apply Gaussian blur to a region."""
|
||||
h, w = region.shape[:2]
|
||||
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
|
||||
return cv2.GaussianBlur(region, (kernel_size, kernel_size), 0)
|
||||
|
||||
|
||||
def _median_blur(region: np.ndarray, strength: float = 3.0) -> np.ndarray:
|
||||
"""Apply median blur to a region."""
|
||||
h, w = region.shape[:2]
|
||||
kernel_size = max(3, int((min(h, w) / 7) * strength)) | 1
|
||||
return cv2.medianBlur(region, kernel_size)
|
||||
|
||||
|
||||
def _pixelate_blur(region: np.ndarray, blocks: int = 10) -> np.ndarray:
|
||||
"""Apply pixelation to a region."""
|
||||
h, w = region.shape[:2]
|
||||
temp_h, temp_w = max(1, h // blocks), max(1, w // blocks)
|
||||
temp = cv2.resize(region, (temp_w, temp_h), interpolation=cv2.INTER_LINEAR)
|
||||
return cv2.resize(temp, (w, h), interpolation=cv2.INTER_NEAREST)
|
||||
|
||||
|
||||
def _blackout_blur(region: np.ndarray, color: tuple[int, int, int] = (0, 0, 0)) -> np.ndarray:
|
||||
"""Replace region with solid color."""
|
||||
return np.full_like(region, color)
|
||||
|
||||
|
||||
class EllipticalBlur:
|
||||
"""Elliptical blur with soft, feathered edges.
|
||||
|
||||
This blur applies Gaussian blur within an elliptical mask that follows
|
||||
the natural oval shape of faces, requiring full image context for proper blending.
|
||||
|
||||
Args:
|
||||
blur_strength (float): Blur intensity multiplier. Defaults to 3.0.
|
||||
margin (int): Extra pixels to extend ellipse beyond bbox. Defaults to 20.
|
||||
"""
|
||||
|
||||
def __init__(self, blur_strength: float = 3.0, margin: int = 20):
|
||||
self.blur_strength = blur_strength
|
||||
self.margin = margin
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
bboxes: list[tuple | list],
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
if not inplace:
|
||||
image = image.copy()
|
||||
|
||||
h, w = image.shape[:2]
|
||||
|
||||
for bbox in bboxes:
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
center_x, center_y = (x1 + x2) // 2, (y1 + y2) // 2
|
||||
axes_x = (x2 - x1) // 2 + self.margin
|
||||
axes_y = (y2 - y1) // 2 + self.margin
|
||||
|
||||
# Create soft elliptical mask
|
||||
mask = np.zeros((h, w), dtype=np.float32)
|
||||
cv2.ellipse(mask, (center_x, center_y), (axes_x, axes_y), 0, 0, 360, 255, -1)
|
||||
mask = cv2.GaussianBlur(mask, (51, 51), 0) / 255.0
|
||||
mask = mask[:, :, np.newaxis]
|
||||
|
||||
kernel_size = max(3, int((min(axes_y, axes_x) * 2 / 7) * self.blur_strength)) | 1
|
||||
blurred = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)
|
||||
image = (blurred * mask + image * (1 - mask)).astype(np.uint8)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
class BlurFace:
|
||||
"""Face blurring with multiple anonymization methods.
|
||||
|
||||
Args:
|
||||
method (str): Blur method - 'gaussian', 'pixelate', 'blackout', 'elliptical', or 'median'.
|
||||
Defaults to 'pixelate'.
|
||||
blur_strength (float): Intensity for gaussian/elliptical/median. Defaults to 3.0.
|
||||
pixel_blocks (int): Block count for pixelate. Defaults to 10.
|
||||
color (Tuple[int, int, int]): Fill color (BGR) for blackout. Defaults to (0, 0, 0).
|
||||
margin (int): Edge margin for elliptical. Defaults to 20.
|
||||
|
||||
Example:
|
||||
>>> blurrer = BlurFace(method='pixelate')
|
||||
>>> anonymized = blurrer.anonymize(image, faces)
|
||||
"""
|
||||
|
||||
VALID_METHODS: ClassVar[set[str]] = {'gaussian', 'pixelate', 'blackout', 'elliptical', 'median'}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
method: str = 'pixelate',
|
||||
blur_strength: float = 3.0,
|
||||
pixel_blocks: int = 15,
|
||||
color: tuple[int, int, int] = (0, 0, 0),
|
||||
margin: int = 20,
|
||||
):
|
||||
self.method = method.lower()
|
||||
self._blur_strength = blur_strength
|
||||
self._pixel_blocks = pixel_blocks
|
||||
self._color = color
|
||||
self._margin = margin
|
||||
|
||||
if self.method not in self.VALID_METHODS:
|
||||
raise ValueError(f"Invalid blur method: '{method}'. Choose from: {sorted(self.VALID_METHODS)}")
|
||||
|
||||
if self.method == 'elliptical':
|
||||
self._elliptical = EllipticalBlur(blur_strength, margin)
|
||||
|
||||
def _blur_region(self, region: np.ndarray) -> np.ndarray:
|
||||
"""Apply blur to a single region based on the configured method."""
|
||||
if self.method == 'gaussian':
|
||||
return _gaussian_blur(region, self._blur_strength)
|
||||
elif self.method == 'median':
|
||||
return _median_blur(region, self._blur_strength)
|
||||
elif self.method == 'pixelate':
|
||||
return _pixelate_blur(region, self._pixel_blocks)
|
||||
elif self.method == 'blackout':
|
||||
return _blackout_blur(region, self._color)
|
||||
return region # Fallback (should not reach here)
|
||||
|
||||
def anonymize(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
faces: list,
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""Anonymize faces in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
faces (List[Dict]): Face detections with 'bbox' key containing [x1, y1, x2, y2].
|
||||
inplace (bool): Modify image in-place if True. Defaults to False.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Image with anonymized faces.
|
||||
"""
|
||||
if not faces:
|
||||
return image if inplace else image.copy()
|
||||
|
||||
bboxes = [face.bbox for face in faces]
|
||||
return self.blur_regions(image, bboxes, inplace)
|
||||
|
||||
def blur_regions(
|
||||
self,
|
||||
image: np.ndarray,
|
||||
bboxes: list[tuple | list],
|
||||
inplace: bool = False,
|
||||
) -> np.ndarray:
|
||||
"""Blur specific rectangular regions in an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image (BGR format).
|
||||
bboxes (List): Bounding boxes as [x1, y1, x2, y2].
|
||||
inplace (bool): Modify image in-place if True. Defaults to False.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Image with blurred regions.
|
||||
"""
|
||||
if not bboxes:
|
||||
return image if inplace else image.copy()
|
||||
|
||||
if self.method == 'elliptical':
|
||||
return self._elliptical(image, bboxes, inplace)
|
||||
|
||||
if not inplace:
|
||||
image = image.copy()
|
||||
|
||||
h, w = image.shape[:2]
|
||||
|
||||
for bbox in bboxes:
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
x1, y1 = max(0, x1), max(0, y1)
|
||||
x2, y2 = min(w, x2), min(h, y2)
|
||||
|
||||
if x2 > x1 and y2 > y1:
|
||||
image[y1:y2, x1:x2] = self._blur_region(image[y1:y2, x1:x2])
|
||||
|
||||
return image
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"BlurFace(method='{self.method}')"
|
||||
@@ -34,10 +34,7 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
|
||||
>>> # Create a specific MobileFace recognizer
|
||||
>>> from uniface.constants import MobileFaceWeights
|
||||
>>> recognizer = create_recognizer(
|
||||
... 'mobileface',
|
||||
... model_name=MobileFaceWeights.MNET_V2
|
||||
... )
|
||||
>>> recognizer = create_recognizer('mobileface', model_name=MobileFaceWeights.MNET_V2)
|
||||
|
||||
>>> # Create a SphereFace recognizer
|
||||
>>> recognizer = create_recognizer('sphereface')
|
||||
@@ -55,10 +52,4 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
'create_recognizer',
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
'BaseRecognizer',
|
||||
]
|
||||
__all__ = ['ArcFace', 'BaseRecognizer', 'MobileFace', 'SphereFace', 'create_recognizer']
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
@@ -13,16 +14,22 @@ from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ['BaseRecognizer', 'PreprocessConfig']
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""Configuration for preprocessing images before feeding them into the model.
|
||||
|
||||
Attributes:
|
||||
input_mean: Mean value(s) for normalization.
|
||||
input_std: Standard deviation value(s) for normalization.
|
||||
input_size: Target image size as (height, width).
|
||||
"""
|
||||
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
input_mean: float | list[float] = 127.5
|
||||
input_std: float | list[float] = 127.5
|
||||
input_size: tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseRecognizer(ABC):
|
||||
@@ -94,7 +101,7 @@ class BaseRecognizer(ABC):
|
||||
"""
|
||||
resized_img = cv2.resize(face_img, self.input_size)
|
||||
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
if isinstance(self.input_std, list | tuple):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
@@ -116,13 +123,14 @@ class BaseRecognizer(ABC):
|
||||
|
||||
return blob
|
||||
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
|
||||
"""
|
||||
Extracts face embedding from an image.
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray | None = None) -> np.ndarray:
|
||||
"""Extract face embedding from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
|
||||
image: Input face image in BGR format. If already aligned (112x112),
|
||||
landmarks can be None.
|
||||
landmarks: Facial landmarks (5 points for alignment). Optional if
|
||||
image is already aligned.
|
||||
|
||||
Returns:
|
||||
Face embedding vector (typically 512-dimensional).
|
||||
@@ -141,16 +149,27 @@ class BaseRecognizer(ABC):
|
||||
return embedding
|
||||
|
||||
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Extracts a l2 normalized face embedding vector from an image.
|
||||
"""Extract an L2-normalized face embedding vector from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format).
|
||||
image: Input face image in BGR format.
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
Normalized face embedding vector (typically 512-dimensional).
|
||||
L2-normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
embedding = self.get_embedding(image, landmarks)
|
||||
norm = np.linalg.norm(embedding)
|
||||
return embedding / norm if norm > 0 else embedding
|
||||
|
||||
def __call__(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""Callable shortcut for the `get_normalized_embedding` method.
|
||||
|
||||
Args:
|
||||
image: Input face image in BGR format.
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
L2-normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
return self.get_normalized_embedding(image, landmarks)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
@@ -34,7 +34,7 @@ class ArcFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
@@ -64,7 +64,7 @@ class MobileFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
@@ -94,7 +94,7 @@ class SphereFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
preprocessing: PreprocessConfig | None = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
47
uniface/spoofing/__init__.py
Normal file
47
uniface/spoofing/__init__.py
Normal file
@@ -0,0 +1,47 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
from .base import BaseSpoofer
|
||||
from .minifasnet import MiniFASNet
|
||||
|
||||
__all__ = [
|
||||
'BaseSpoofer',
|
||||
'MiniFASNet',
|
||||
'MiniFASNetWeights',
|
||||
'SpoofingResult',
|
||||
'create_spoofer',
|
||||
]
|
||||
|
||||
|
||||
def create_spoofer(
|
||||
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
|
||||
scale: float | None = None,
|
||||
) -> MiniFASNet:
|
||||
"""Factory function to create a face anti-spoofing model.
|
||||
|
||||
This is a convenience function that creates a MiniFASNet instance
|
||||
with the specified model variant and optional custom scale.
|
||||
|
||||
Args:
|
||||
model_name: The model variant to use. Options:
|
||||
- MiniFASNetWeights.V2: Improved version (default), uses scale=2.7
|
||||
- MiniFASNetWeights.V1SE: Squeeze-and-excitation version, uses scale=4.0
|
||||
scale: Custom crop scale factor for face region. If None, uses the
|
||||
default scale for the selected model variant.
|
||||
|
||||
Returns:
|
||||
An initialized face anti-spoofing model.
|
||||
|
||||
Example:
|
||||
>>> from uniface.spoofing import create_spoofer, MiniFASNetWeights
|
||||
>>> spoofer = create_spoofer()
|
||||
>>> result = spoofer.predict(image, face.bbox)
|
||||
>>> print(f'Is real: {result.is_real}, Confidence: {result.confidence:.2%}')
|
||||
"""
|
||||
return MiniFASNet(model_name=model_name, scale=scale)
|
||||
112
uniface/spoofing/base.py
Normal file
112
uniface/spoofing/base.py
Normal file
@@ -0,0 +1,112 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
__all__ = ['BaseSpoofer', 'SpoofingResult']
|
||||
|
||||
|
||||
class BaseSpoofer(ABC):
|
||||
"""
|
||||
Abstract base class for all face anti-spoofing models.
|
||||
|
||||
This class defines the common interface that all anti-spoofing models must implement,
|
||||
ensuring consistency across different spoofing detection methods. Anti-spoofing models
|
||||
detect whether a face is real (live person) or fake (photo, video, mask, etc.).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the underlying model for inference.
|
||||
|
||||
This method should handle loading model weights, creating the
|
||||
inference session (e.g., ONNX Runtime), and any necessary
|
||||
setup procedures to prepare the model for prediction.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input image for model inference.
|
||||
|
||||
This method should crop the face region using the bounding box,
|
||||
resize it to the model's expected input size, and normalize
|
||||
the pixel values as required by the model.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format with shape (H, W, C).
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
np.ndarray: The preprocessed image tensor ready for inference,
|
||||
typically with shape (1, C, H, W).
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Postprocess raw model outputs into prediction result.
|
||||
|
||||
This method takes the raw output from the model's inference and
|
||||
converts it into a SpoofingResult.
|
||||
|
||||
Args:
|
||||
outputs (np.ndarray): Raw outputs from the model inference (logits).
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Perform end-to-end anti-spoofing prediction on a face.
|
||||
|
||||
This method orchestrates the full pipeline: preprocessing the input,
|
||||
running inference, and postprocessing to return the prediction.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format containing the face.
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
This is typically obtained from a face detector.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
|
||||
Example:
|
||||
>>> spoofer = MiniFASNet()
|
||||
>>> detector = RetinaFace()
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... result = spoofer.predict(image, face.bbox)
|
||||
... label = 'Real' if result.is_real else 'Fake'
|
||||
... print(f'{label}: {result.confidence:.2%}')
|
||||
"""
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Provides a convenient, callable shortcut for the `predict` method.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): Input image in BGR format.
|
||||
bbox (Union[List, np.ndarray]): Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
return self.predict(image, bbox)
|
||||
220
uniface/spoofing/minifasnet.py
Normal file
220
uniface/spoofing/minifasnet.py
Normal file
@@ -0,0 +1,220 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import MiniFASNetWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
from uniface.types import SpoofingResult
|
||||
|
||||
from .base import BaseSpoofer
|
||||
|
||||
__all__ = ['MiniFASNet']
|
||||
|
||||
# Default crop scales for each model variant
|
||||
DEFAULT_SCALES = {
|
||||
MiniFASNetWeights.V1SE: 4.0,
|
||||
MiniFASNetWeights.V2: 2.7,
|
||||
}
|
||||
|
||||
|
||||
class MiniFASNet(BaseSpoofer):
|
||||
"""
|
||||
MiniFASNet: Lightweight Face Anti-Spoofing with ONNX Runtime.
|
||||
|
||||
MiniFASNet is a face anti-spoofing model that detects whether a face is real
|
||||
(live person) or fake (photo, video replay, mask, etc.). It supports two model
|
||||
variants: V1SE (with squeeze-and-excitation) and V2 (improved version).
|
||||
|
||||
The model takes a face region cropped from the image using a bounding box
|
||||
and predicts whether it's a real or spoofed face.
|
||||
|
||||
Reference:
|
||||
https://github.com/yakhyo/face-anti-spoofing
|
||||
|
||||
Args:
|
||||
model_name (MiniFASNetWeights): The enum specifying the model variant to load.
|
||||
Options: V1SE (scale=4.0), V2 (scale=2.7).
|
||||
Defaults to `MiniFASNetWeights.V2`.
|
||||
scale (Optional[float]): Custom crop scale factor for face region.
|
||||
If None, uses the default scale for the selected model variant.
|
||||
V1SE uses 4.0, V2 uses 2.7.
|
||||
|
||||
Attributes:
|
||||
scale (float): Crop scale factor for face region extraction.
|
||||
input_size (Tuple[int, int]): Model input dimensions (width, height).
|
||||
|
||||
Example:
|
||||
>>> from uniface.spoofing import MiniFASNet
|
||||
>>> from uniface import RetinaFace
|
||||
>>>
|
||||
>>> detector = RetinaFace()
|
||||
>>> spoofer = MiniFASNet()
|
||||
>>>
|
||||
>>> # Detect faces and check if they are real
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... result = spoofer.predict(image, face.bbox)
|
||||
... label = 'Real' if result.is_real else 'Fake'
|
||||
... print(f'{label}: {result.confidence:.2%}')
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MiniFASNetWeights = MiniFASNetWeights.V2,
|
||||
scale: float | None = None,
|
||||
) -> None:
|
||||
Logger.info(f'Initializing MiniFASNet with model={model_name.name}')
|
||||
|
||||
# Use default scale for the model variant if not specified
|
||||
self.scale = scale if scale is not None else DEFAULT_SCALES.get(model_name, 2.7)
|
||||
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = create_onnx_session(self.model_path)
|
||||
|
||||
# Get input configuration
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
self.input_name = input_cfg.name
|
||||
# Input shape is (batch, channels, height, width) - we need (width, height)
|
||||
self.input_size = tuple(input_cfg.shape[2:4][::-1]) # (width, height)
|
||||
|
||||
# Get output configuration
|
||||
output_cfg = self.session.get_outputs()[0]
|
||||
self.output_name = output_cfg.name
|
||||
|
||||
Logger.info(f'MiniFASNet initialized with input size {self.input_size}, scale={self.scale}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load MiniFASNet model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize MiniFASNet model: {e}') from e
|
||||
|
||||
def _xyxy_to_xywh(self, bbox: list | np.ndarray) -> list[int]:
|
||||
"""Convert bounding box from [x1, y1, x2, y2] to [x, y, w, h] format."""
|
||||
x1, y1, x2, y2 = bbox[:4]
|
||||
return [int(x1), int(y1), int(x2 - x1), int(y2 - y1)]
|
||||
|
||||
def _crop_face(self, image: np.ndarray, bbox_xywh: list[int]) -> np.ndarray:
|
||||
"""
|
||||
Crop and resize face region from image using scale factor.
|
||||
|
||||
The crop is centered on the face bounding box and scaled to capture
|
||||
more context around the face, which is important for anti-spoofing.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format.
|
||||
bbox_xywh: Face bounding box in [x, y, w, h] format.
|
||||
|
||||
Returns:
|
||||
Cropped and resized face region.
|
||||
"""
|
||||
src_h, src_w = image.shape[:2]
|
||||
x, y, box_w, box_h = bbox_xywh
|
||||
|
||||
# Calculate the scale to apply based on image and face size
|
||||
scale = min((src_h - 1) / box_h, (src_w - 1) / box_w, self.scale)
|
||||
new_w = box_w * scale
|
||||
new_h = box_h * scale
|
||||
|
||||
# Calculate center of the bounding box
|
||||
center_x = x + box_w / 2
|
||||
center_y = y + box_h / 2
|
||||
|
||||
# Calculate new bounding box coordinates
|
||||
x1 = max(0, int(center_x - new_w / 2))
|
||||
y1 = max(0, int(center_y - new_h / 2))
|
||||
x2 = min(src_w - 1, int(center_x + new_w / 2))
|
||||
y2 = min(src_h - 1, int(center_y + new_h / 2))
|
||||
|
||||
# Crop and resize
|
||||
cropped = image[y1 : y2 + 1, x1 : x2 + 1]
|
||||
resized = cv2.resize(cropped, self.input_size)
|
||||
|
||||
return resized
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: list | np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Preprocess the input image for model inference.
|
||||
|
||||
Crops the face region, converts to float32, and arranges
|
||||
dimensions for the model (NCHW format).
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format with shape (H, W, C).
|
||||
bbox: Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
Preprocessed image tensor with shape (1, C, H, W).
|
||||
"""
|
||||
# Convert bbox format
|
||||
bbox_xywh = self._xyxy_to_xywh(bbox)
|
||||
|
||||
# Crop and resize face region
|
||||
face = self._crop_face(image, bbox_xywh)
|
||||
|
||||
# Convert to float32 (no normalization needed for this model)
|
||||
face = face.astype(np.float32)
|
||||
|
||||
# HWC -> CHW -> NCHW
|
||||
face = np.transpose(face, (2, 0, 1))
|
||||
face = np.expand_dims(face, axis=0)
|
||||
|
||||
return face
|
||||
|
||||
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
||||
"""Apply softmax to logits along axis 1."""
|
||||
e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
|
||||
return e_x / e_x.sum(axis=1, keepdims=True)
|
||||
|
||||
def postprocess(self, outputs: np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Postprocess raw model outputs into prediction result.
|
||||
|
||||
Applies softmax to convert logits to probabilities and
|
||||
returns the SpoofingResult with is_real flag and confidence score.
|
||||
|
||||
Args:
|
||||
outputs: Raw outputs from the model inference (logits).
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
probs = self._softmax(outputs)
|
||||
label_idx = int(np.argmax(probs))
|
||||
confidence = float(probs[0, label_idx])
|
||||
|
||||
return SpoofingResult(is_real=(label_idx == 1), confidence=confidence)
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: list | np.ndarray) -> SpoofingResult:
|
||||
"""
|
||||
Perform end-to-end anti-spoofing prediction on a face.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format containing the face.
|
||||
bbox: Face bounding box in [x1, y1, x2, y2] format.
|
||||
|
||||
Returns:
|
||||
SpoofingResult: Result containing is_real flag and confidence score.
|
||||
"""
|
||||
# Preprocess
|
||||
input_tensor = self.preprocess(image, bbox)
|
||||
|
||||
# Run inference
|
||||
outputs = self.session.run([self.output_name], {self.input_name: input_tensor})[0]
|
||||
|
||||
# Postprocess and return
|
||||
return self.postprocess(outputs)
|
||||
216
uniface/types.py
Normal file
216
uniface/types.py
Normal file
@@ -0,0 +1,216 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""Unified type definitions for UniFace.
|
||||
|
||||
This module centralizes all result dataclasses used across the library,
|
||||
providing consistent and immutable return types for model predictions.
|
||||
|
||||
Note on mutability:
|
||||
- Result dataclasses (GazeResult, SpoofingResult, EmotionResult, AttributeResult)
|
||||
are frozen (immutable) since they represent computation outputs that shouldn't change.
|
||||
- Face dataclass is mutable because FaceAnalyzer enriches it with additional
|
||||
attributes (embedding, age, gender, etc.) after initial detection.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, fields
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
__all__ = [
|
||||
'AttributeResult',
|
||||
'EmotionResult',
|
||||
'Face',
|
||||
'GazeResult',
|
||||
'SpoofingResult',
|
||||
]
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class GazeResult:
|
||||
"""Result of gaze estimation.
|
||||
|
||||
Attributes:
|
||||
pitch: Vertical gaze angle in radians (positive = up, negative = down).
|
||||
yaw: Horizontal gaze angle in radians (positive = right, negative = left).
|
||||
"""
|
||||
|
||||
pitch: float
|
||||
yaw: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f'GazeResult(pitch={self.pitch:.4f}, yaw={self.yaw:.4f})'
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class SpoofingResult:
|
||||
"""Result of face anti-spoofing detection.
|
||||
|
||||
Attributes:
|
||||
is_real: True if the face is real/live, False if fake/spoof.
|
||||
confidence: Confidence score for the prediction (0.0 to 1.0).
|
||||
"""
|
||||
|
||||
is_real: bool
|
||||
confidence: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
label = 'Real' if self.is_real else 'Fake'
|
||||
return f'SpoofingResult({label}, confidence={self.confidence:.4f})'
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class EmotionResult:
|
||||
"""Result of emotion recognition.
|
||||
|
||||
Attributes:
|
||||
emotion: Predicted emotion label (e.g., 'Happy', 'Sad', 'Angry').
|
||||
confidence: Confidence score for the prediction (0.0 to 1.0).
|
||||
"""
|
||||
|
||||
emotion: str
|
||||
confidence: float
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"EmotionResult('{self.emotion}', confidence={self.confidence:.4f})"
|
||||
|
||||
|
||||
@dataclass(slots=True, frozen=True)
|
||||
class AttributeResult:
|
||||
"""Unified result structure for face attribute prediction.
|
||||
|
||||
This dataclass provides a consistent return type across different attribute
|
||||
prediction models (e.g., AgeGender, FairFace), enabling interoperability
|
||||
and unified handling of results.
|
||||
|
||||
Attributes:
|
||||
gender: Predicted gender (0=Female, 1=Male).
|
||||
age: Exact age in years. Provided by AgeGender model, None for FairFace.
|
||||
age_group: Age range string like "20-29". Provided by FairFace, None for AgeGender.
|
||||
race: Race/ethnicity label. Provided by FairFace only.
|
||||
|
||||
Properties:
|
||||
sex: Gender as a human-readable string ("Female" or "Male").
|
||||
|
||||
Examples:
|
||||
>>> # AgeGender result
|
||||
>>> result = AttributeResult(gender=1, age=25)
|
||||
>>> result.sex
|
||||
'Male'
|
||||
|
||||
>>> # FairFace result
|
||||
>>> result = AttributeResult(gender=0, age_group='20-29', race='East Asian')
|
||||
>>> result.sex
|
||||
'Female'
|
||||
"""
|
||||
|
||||
gender: int
|
||||
age: int | None = None
|
||||
age_group: str | None = None
|
||||
race: str | None = None
|
||||
|
||||
@property
|
||||
def sex(self) -> str:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'gender={self.sex}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.age_group is not None:
|
||||
parts.append(f'age_group={self.age_group}')
|
||||
if self.race is not None:
|
||||
parts.append(f'race={self.race}')
|
||||
return f'AttributeResult({", ".join(parts)})'
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Face:
|
||||
"""Detected face with analysis results.
|
||||
|
||||
This dataclass represents a single detected face along with optional
|
||||
analysis results such as embeddings, age, gender, and race predictions.
|
||||
|
||||
Note: This dataclass is mutable (not frozen) because FaceAnalyzer enriches
|
||||
Face objects with additional attributes after initial detection.
|
||||
|
||||
Attributes:
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
confidence: Detection confidence score.
|
||||
landmarks: Facial landmark coordinates (typically 5 points).
|
||||
embedding: Face embedding vector for recognition (optional).
|
||||
gender: Predicted gender, 0=Female, 1=Male (optional).
|
||||
age: Predicted exact age in years (optional, from AgeGender model).
|
||||
age_group: Predicted age range like "20-29" (optional, from FairFace).
|
||||
race: Predicted race/ethnicity (optional, from FairFace).
|
||||
emotion: Predicted emotion label (optional, from Emotion model).
|
||||
emotion_confidence: Confidence score for emotion prediction (optional).
|
||||
|
||||
Properties:
|
||||
sex: Gender as a human-readable string ("Female" or "Male").
|
||||
bbox_xyxy: Bounding box in (x1, y1, x2, y2) format.
|
||||
bbox_xywh: Bounding box in (x1, y1, width, height) format.
|
||||
"""
|
||||
|
||||
# Required attributes (from detection)
|
||||
bbox: np.ndarray
|
||||
confidence: float
|
||||
landmarks: np.ndarray
|
||||
|
||||
# Optional attributes (enriched by analyzers)
|
||||
embedding: np.ndarray | None = None
|
||||
gender: int | None = None
|
||||
age: int | None = None
|
||||
age_group: str | None = None
|
||||
race: str | None = None
|
||||
emotion: str | None = None
|
||||
emotion_confidence: float | None = None
|
||||
|
||||
def compute_similarity(self, other: Face) -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
if self.embedding is None or other.embedding is None:
|
||||
raise ValueError('Both faces must have embeddings for similarity computation')
|
||||
return float(compute_similarity(self.embedding, other.embedding))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return {f.name: getattr(self, f.name) for f in fields(self)}
|
||||
|
||||
@property
|
||||
def sex(self) -> str | None:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
if self.gender is None:
|
||||
return None
|
||||
return 'Female' if self.gender == 0 else 'Male'
|
||||
|
||||
@property
|
||||
def bbox_xyxy(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, x2, y2) format."""
|
||||
return self.bbox.copy()
|
||||
|
||||
@property
|
||||
def bbox_xywh(self) -> np.ndarray:
|
||||
"""Get bounding box coordinates in (x1, y1, w, h) format."""
|
||||
return np.array([self.bbox[0], self.bbox[1], self.bbox[2] - self.bbox[0], self.bbox[3] - self.bbox[1]])
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.age_group is not None:
|
||||
parts.append(f'age_group={self.age_group}')
|
||||
if self.gender is not None:
|
||||
parts.append(f'sex={self.sex}')
|
||||
if self.race is not None:
|
||||
parts.append(f'race={self.race}')
|
||||
if self.emotion is not None:
|
||||
parts.append(f'emotion={self.emotion}')
|
||||
if self.embedding is not None:
|
||||
parts.append(f'embedding_dim={self.embedding.shape[0]}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,11 +2,26 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
"""Visualization utilities for UniFace.
|
||||
|
||||
This module provides functions for drawing detection results, gaze directions,
|
||||
and face parsing segmentation maps on images.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
__all__ = [
|
||||
'FACE_PARSING_COLORS',
|
||||
'FACE_PARSING_LABELS',
|
||||
'draw_detections',
|
||||
'draw_fancy_bbox',
|
||||
'draw_gaze',
|
||||
'vis_parsing_maps',
|
||||
]
|
||||
|
||||
# Face parsing component names (19 classes)
|
||||
FACE_PARSING_LABELS = [
|
||||
'background',
|
||||
@@ -57,23 +72,25 @@ FACE_PARSING_COLORS = [
|
||||
def draw_detections(
|
||||
*,
|
||||
image: np.ndarray,
|
||||
bboxes: Union[List[np.ndarray], List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[List[np.ndarray], List[List[List[float]]]],
|
||||
bboxes: list[np.ndarray] | list[list[float]],
|
||||
scores: np.ndarray | list[float],
|
||||
landmarks: list[np.ndarray] | list[list[list[float]]],
|
||||
vis_threshold: float = 0.6,
|
||||
draw_score: bool = False,
|
||||
fancy_bbox: bool = True,
|
||||
):
|
||||
"""
|
||||
Draws bounding boxes, landmarks, and optional scores on an image.
|
||||
) -> None:
|
||||
"""Draw bounding boxes, landmarks, and optional scores on an image.
|
||||
|
||||
Modifies the image in-place.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
bboxes: List of bounding boxes [x1, y1, x2, y2].
|
||||
image: Input image to draw on (modified in-place).
|
||||
bboxes: List of bounding boxes as [x1, y1, x2, y2].
|
||||
scores: List of confidence scores.
|
||||
landmarks: List of landmark sets with shape (5, 2).
|
||||
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
||||
draw_score: Whether to draw confidence scores. Defaults to False.
|
||||
fancy_bbox: Use corner-style bounding boxes. Defaults to True.
|
||||
"""
|
||||
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
@@ -134,19 +151,18 @@ def draw_detections(
|
||||
def draw_fancy_bbox(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
color: Tuple[int, int, int] = (0, 255, 0),
|
||||
color: tuple[int, int, int] = (0, 255, 0),
|
||||
thickness: int = 3,
|
||||
proportion: float = 0.2,
|
||||
):
|
||||
"""
|
||||
Draws a bounding box with fancy corners on an image.
|
||||
) -> None:
|
||||
"""Draw a bounding box with fancy corners on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on.
|
||||
image: Input image to draw on (modified in-place).
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
||||
color: Color of the bounding box. Defaults to green.
|
||||
thickness: Thickness of the bounding box lines. Defaults to 3.
|
||||
proportion: Proportion of the corner length to the width/height of the bounding box. Defaults to 0.2.
|
||||
color: Color of the bounding box in BGR. Defaults to green.
|
||||
thickness: Thickness of the corner lines. Defaults to 3.
|
||||
proportion: Proportion of corner length to box dimensions. Defaults to 0.2.
|
||||
"""
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
width = x2 - x1
|
||||
@@ -177,15 +193,14 @@ def draw_fancy_bbox(
|
||||
def draw_gaze(
|
||||
image: np.ndarray,
|
||||
bbox: np.ndarray,
|
||||
pitch: np.ndarray,
|
||||
yaw: np.ndarray,
|
||||
pitch: np.ndarray | float,
|
||||
yaw: np.ndarray | float,
|
||||
*,
|
||||
draw_bbox: bool = True,
|
||||
fancy_bbox: bool = True,
|
||||
draw_angles: bool = True,
|
||||
):
|
||||
"""
|
||||
Draws gaze direction with optional bounding box on an image.
|
||||
) -> None:
|
||||
"""Draw gaze direction with optional bounding box on an image.
|
||||
|
||||
Args:
|
||||
image: Input image to draw on (modified in-place).
|
||||
@@ -194,7 +209,7 @@ def draw_gaze(
|
||||
yaw: Horizontal gaze angle in radians.
|
||||
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
||||
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to False.
|
||||
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
|
||||
"""
|
||||
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
||||
|
||||
@@ -275,29 +290,25 @@ def vis_parsing_maps(
|
||||
save_image: bool = False,
|
||||
save_path: str = 'result.png',
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Visualizes face parsing segmentation mask by overlaying colored regions on the image.
|
||||
"""Visualize face parsing segmentation mask by overlaying colored regions.
|
||||
|
||||
Args:
|
||||
image: Input face image in RGB format with shape (H, W, 3).
|
||||
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
|
||||
value represents a facial component class (0-18).
|
||||
value represents a facial component class (0-18).
|
||||
save_image: Whether to save the visualization to disk. Defaults to False.
|
||||
save_path: Path to save the visualization if save_image is True.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Blended image with segmentation overlay in BGR format.
|
||||
Blended image with segmentation overlay in BGR format.
|
||||
|
||||
Example:
|
||||
>>> import cv2
|
||||
>>> from uniface.parsing import BiSeNet
|
||||
>>> from uniface.visualization import vis_parsing_maps
|
||||
>>>
|
||||
>>> parser = BiSeNet()
|
||||
>>> face_image = cv2.imread('face.jpg')
|
||||
>>> mask = parser.parse(face_image)
|
||||
>>>
|
||||
>>> # Visualize
|
||||
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
||||
>>> result = vis_parsing_maps(face_rgb, mask)
|
||||
>>> cv2.imwrite('parsed_face.jpg', result)
|
||||
|
||||
Reference in New Issue
Block a user