mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0c93598007 | ||
|
|
779952e3f8 | ||
|
|
39b50b62bd | ||
|
|
db7532ecf1 | ||
|
|
4b8dc2c0f9 | ||
|
|
0a2a10e165 | ||
|
|
84cda5f56c | ||
|
|
0771a7959a | ||
|
|
15947eb605 | ||
|
|
1ccc4f6b77 | ||
|
|
189755a1a6 |
@@ -208,8 +208,9 @@ landmarks = landmarker.get_landmarks(image, bbox)
|
||||
from uniface import AgeGender
|
||||
|
||||
predictor = AgeGender()
|
||||
gender, age = predictor.predict(image, bbox)
|
||||
# Returns: ("Male"/"Female", age_in_years)
|
||||
gender_id, age = predictor.predict(image, bbox)
|
||||
# Returns: (gender_id, age_in_years)
|
||||
# gender_id: 0 for Female, 1 for Male
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -45,6 +45,7 @@ for i, face in enumerate(faces):
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1:
|
||||
Confidence: 0.99
|
||||
@@ -122,6 +123,7 @@ else:
|
||||
```
|
||||
|
||||
**Similarity thresholds:**
|
||||
|
||||
- `> 0.6`: Same person (high confidence)
|
||||
- `0.4 - 0.6`: Uncertain (manual review)
|
||||
- `< 0.4`: Different people
|
||||
@@ -186,11 +188,13 @@ faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender}, {age} years old")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
|
||||
```
|
||||
Face 1: Male, 32 years old
|
||||
Face 2: Female, 28 years old
|
||||
@@ -369,4 +373,3 @@ from uniface import retinaface # Module, not class
|
||||
---
|
||||
|
||||
Happy coding! 🚀
|
||||
|
||||
|
||||
52
README.md
52
README.md
@@ -1,10 +1,11 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://github.com/astral-sh/ruff)
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
@@ -56,6 +57,7 @@ pip install uniface[gpu]
|
||||
```
|
||||
|
||||
**Requirements:**
|
||||
|
||||
- CUDA 11.x or 12.x
|
||||
- cuDNN 8.x
|
||||
- See [ONNX Runtime GPU requirements](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html)
|
||||
@@ -145,7 +147,8 @@ detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender_id, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f"{gender}, {age} years old")
|
||||
```
|
||||
|
||||
@@ -217,17 +220,18 @@ faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
|
||||
### Face Detection (WIDER FACE Dataset)
|
||||
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
|--------------------|--------|--------|--------|-------------------------|
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
| Model | Easy | Medium | Hard | Use Case |
|
||||
| ------------------ | ------ | ------ | ------ | ---------------------- |
|
||||
| retinaface_mnet025 | 88.48% | 87.02% | 80.61% | Mobile/Edge devices |
|
||||
| retinaface_mnet_v2 | 91.70% | 91.03% | 86.60% | Balanced (recommended) |
|
||||
| retinaface_r34 | 94.16% | 93.12% | 88.90% | High accuracy |
|
||||
| scrfd_500m | 90.57% | 88.12% | 68.51% | Real-time applications |
|
||||
| scrfd_10g | 95.16% | 93.87% | 83.05% | Best accuracy/speed |
|
||||
|
||||
*Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)*
|
||||
_Accuracy values from original papers: [RetinaFace](https://arxiv.org/abs/1905.00641), [SCRFD](https://arxiv.org/abs/2105.04714)_
|
||||
|
||||
**Benchmark on your hardware:**
|
||||
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg --iterations 100
|
||||
```
|
||||
@@ -394,12 +398,29 @@ pip install -e ".[dev]"
|
||||
|
||||
# Run tests
|
||||
pytest
|
||||
|
||||
# Format code
|
||||
black uniface/
|
||||
isort uniface/
|
||||
```
|
||||
|
||||
### Code Formatting
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
ruff format .
|
||||
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
```
|
||||
|
||||
Ruff configuration is in `pyproject.toml`. Key settings:
|
||||
|
||||
- Line length: 120
|
||||
- Python target: 3.10+
|
||||
- Import sorting: `uniface` as first-party
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
@@ -438,4 +459,3 @@ uniface/
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please open an issue or submit a pull request on [GitHub](https://github.com/yakhyo/uniface).
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
322
examples/face_analyzer.ipynb
Normal file
322
examples/face_analyzer.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,12 +1,41 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.1.0"
|
||||
version = "1.1.2"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }
|
||||
authors = [{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" }]
|
||||
maintainers = [
|
||||
{ name = "Yakhyokhuja Valikhujaev", email = "yakhyo9696@gmail.com" },
|
||||
]
|
||||
|
||||
requires-python = ">=3.10,<3.14"
|
||||
keywords = [
|
||||
"face-detection",
|
||||
"face-recognition",
|
||||
"facial-landmarks",
|
||||
"age-detection",
|
||||
"gender-detection",
|
||||
"computer-vision",
|
||||
"deep-learning",
|
||||
"onnx",
|
||||
"onnxruntime",
|
||||
"face-analysis",
|
||||
]
|
||||
|
||||
classifiers = [
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: Science/Research",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
"numpy>=1.21.0",
|
||||
"opencv-python>=4.5.0",
|
||||
@@ -14,24 +43,56 @@ dependencies = [
|
||||
"onnxruntime>=1.16.0",
|
||||
"scikit-image>=0.19.0",
|
||||
"requests>=2.28.0",
|
||||
"tqdm>=4.64.0"
|
||||
"tqdm>=4.64.0",
|
||||
]
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = ["pytest>=7.0.0"]
|
||||
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
|
||||
gpu = ["onnxruntime-gpu>=1.16.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/yakhyo/uniface"
|
||||
Repository = "https://github.com/yakhyo/uniface"
|
||||
Documentation = "https://github.com/yakhyo/uniface/blob/main/README.md"
|
||||
"Quick Start" = "https://github.com/yakhyo/uniface/blob/main/QUICKSTART.md"
|
||||
"Model Zoo" = "https://github.com/yakhyo/uniface/blob/main/MODELS.md"
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=64", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = { find = {} }
|
||||
packages = { find = { where = ["."], include = ["uniface*"] } }
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"uniface" = ["*.txt", "*.md"]
|
||||
uniface = ["py.typed"]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
target-version = "py310"
|
||||
exclude = [
|
||||
".git",
|
||||
".ruff_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"*.egg-info",
|
||||
".venv",
|
||||
"venv",
|
||||
".pytest_cache",
|
||||
".mypy_cache",
|
||||
"*.ipynb",
|
||||
]
|
||||
|
||||
[tool.ruff.format]
|
||||
quote-style = "single"
|
||||
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W"]
|
||||
|
||||
[tool.ruff.lint.flake8-quotes]
|
||||
docstring-quotes = "double"
|
||||
|
||||
[tool.ruff.lint.isort]
|
||||
known-first-party = ["uniface"]
|
||||
|
||||
@@ -1,97 +1,74 @@
|
||||
# Scripts
|
||||
|
||||
Collection of example scripts demonstrating UniFace functionality.
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
- `run_detection.py` - Face detection on images
|
||||
- `run_age_gender.py` - Age and gender prediction
|
||||
- `run_landmarks.py` - Facial landmark detection
|
||||
- `run_recognition.py` - Face recognition and embeddings
|
||||
- `run_face_search.py` - Face search and matching
|
||||
- `run_video_detection.py` - Video processing with face detection
|
||||
- `batch_process.py` - Batch processing of image folders
|
||||
- `download_model.py` - Download and manage models
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Quick Start
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender detection
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
|
||||
# Webcam demo
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Emotion detection
|
||||
python scripts/run_emotion.py --image assets/test.jpg
|
||||
python scripts/run_emotion.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Import Examples
|
||||
|
||||
The scripts use direct class imports for better developer experience:
|
||||
|
||||
```python
|
||||
# Face Detection
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
|
||||
detector = RetinaFace() # or SCRFD()
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Face Recognition
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
recognizer = ArcFace() # or MobileFace(), SphereFace()
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
|
||||
# Age & Gender
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
age_gender = AgeGender()
|
||||
gender, age = age_gender.predict(image, bbox)
|
||||
|
||||
# Landmarks
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
```
|
||||
|
||||
## Available Classes
|
||||
|
||||
**Detection:**
|
||||
- `RetinaFace` - High accuracy face detection
|
||||
- `SCRFD` - Fast face detection
|
||||
|
||||
**Recognition:**
|
||||
- `ArcFace` - High accuracy face recognition
|
||||
- `MobileFace` - Lightweight face recognition
|
||||
- `SphereFace` - Alternative face recognition
|
||||
|
||||
**Attributes:**
|
||||
- `AgeGender` - Age and gender prediction
|
||||
|
||||
**Landmarks:**
|
||||
- `Landmark106` - 106-point facial landmarks
|
||||
|
||||
## Common Options
|
||||
|
||||
Most scripts support:
|
||||
- `--help` - Show usage information
|
||||
- `--verbose` - Enable detailed logging
|
||||
- `--detector` - Choose detector (retinaface, scrfd)
|
||||
- `--threshold` - Set confidence threshold
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--detector` | Choose detector: `retinaface` or `scrfd` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.6) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Testing
|
||||
## Quick Test
|
||||
|
||||
Run basic functionality test:
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
|
||||
For comprehensive testing, see the main project tests:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
@@ -1,157 +1,96 @@
|
||||
"""Batch Image Processing Script"""
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
image_files = []
|
||||
files = []
|
||||
for ext in extensions:
|
||||
image_files.extend(input_dir.glob(f"*.{ext}"))
|
||||
image_files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
|
||||
return sorted(image_files)
|
||||
files.extend(input_dir.glob(f'*.{ext}'))
|
||||
files.extend(input_dir.glob(f'*.{ext.upper()}'))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_single_image(detector, image_path: Path, output_dir: Path,
|
||||
vis_threshold: float, skip_existing: bool) -> dict:
|
||||
output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}"
|
||||
|
||||
# Skip if already processed
|
||||
if skip_existing and output_path.exists():
|
||||
return {"status": "skipped", "faces": 0}
|
||||
|
||||
# Load image
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return {"status": "error", "error": "Failed to load image"}
|
||||
return -1
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = detector.detect(image)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Draw detections
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return {"status": "success", "faces": len(faces)}
|
||||
|
||||
|
||||
def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple,
|
||||
vis_threshold: float, skip_existing: bool):
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
# Create output directory
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get image files
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in '{input_dir}' with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Input: {input_dir}")
|
||||
print(f"Output: {output_dir}")
|
||||
print(f"Found {len(image_files)} images\n")
|
||||
|
||||
# Process images
|
||||
results = {
|
||||
"success": 0,
|
||||
"skipped": 0,
|
||||
"error": 0,
|
||||
"total_faces": 0
|
||||
}
|
||||
|
||||
with tqdm(image_files, desc="Processing images", unit="img") as pbar:
|
||||
for image_path in pbar:
|
||||
result = process_single_image(
|
||||
detector, image_path, output_path,
|
||||
vis_threshold, skip_existing
|
||||
)
|
||||
|
||||
if result["status"] == "success":
|
||||
results["success"] += 1
|
||||
results["total_faces"] += result["faces"]
|
||||
pbar.set_postfix({"faces": result["faces"]})
|
||||
elif result["status"] == "skipped":
|
||||
results["skipped"] += 1
|
||||
else:
|
||||
results["error"] += 1
|
||||
print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Print summary
|
||||
print(f"\nBatch processing complete!")
|
||||
print(f" Total images: {len(image_files)}")
|
||||
print(f" Successfully processed: {results['success']}")
|
||||
print(f" Skipped: {results['skipped']}")
|
||||
print(f" Errors: {results['error']}")
|
||||
print(f" Total faces detected: {results['total_faces']}")
|
||||
if results['success'] > 0:
|
||||
print(f" Average faces per image: {results['total_faces']/results['success']:.2f}")
|
||||
print(f"\nResults saved to: {output_dir}")
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True,
|
||||
help="Input directory containing images")
|
||||
parser.add_argument("--output", type=str, required=True,
|
||||
help="Output directory for processed images")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp",
|
||||
help="Comma-separated list of image extensions")
|
||||
parser.add_argument("--skip_existing", action="store_true",
|
||||
help="Skip files that already exist in output directory")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description='Batch process images with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input directory')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output directory')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input directory exists
|
||||
if not Path(args.input).exists():
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parse extensions
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
if not image_files:
|
||||
print(f'No images found with extensions {extensions}')
|
||||
return
|
||||
|
||||
# Process batch
|
||||
batch_process(detector, args.input, args.output, extensions,
|
||||
args.threshold, args.skip_existing)
|
||||
print(f'Found {len(image_files)} images')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc='Processing', unit='img'):
|
||||
out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f'\nFailed: {img_path.name}')
|
||||
|
||||
print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights,
|
||||
SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
|
||||
# All available model types
|
||||
ALL_MODEL_TYPES = {
|
||||
MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
@@ -19,59 +24,37 @@ ALL_MODEL_TYPES = {
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f'Downloading: {weight.value}')
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f' Done: {weight.value}')
|
||||
except Exception as e:
|
||||
print(f' Failed: {e}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download and verify model weights.")
|
||||
parser = argparse.ArgumentParser(description='Download model weights')
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
'--model-type',
|
||||
type=str,
|
||||
choices=list(ALL_MODEL_TYPES.keys()),
|
||||
help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.",
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help='Model type to download. If not specified, downloads all.',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model and not args.model_type:
|
||||
# Backward compatibility - assume RetinaFace
|
||||
try:
|
||||
weight = RetinaFaceWeights[args.model]
|
||||
print(f"Downloading RetinaFace model: {weight.value}")
|
||||
verify_model_weights(weight)
|
||||
print("Model downloaded successfully.")
|
||||
except KeyError:
|
||||
print(f"Invalid RetinaFace model: {args.model}")
|
||||
print(f"Available models: {[m.name for m in RetinaFaceWeights]}")
|
||||
return
|
||||
|
||||
if args.model_type:
|
||||
# Download all models from specific type
|
||||
model_enum = ALL_MODEL_TYPES[args.model_type]
|
||||
print(f"Downloading all {args.model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
print(f'Downloading {args.model_type} models...')
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
# Download all models from all types
|
||||
print("Downloading all models...")
|
||||
for model_type, model_enum in ALL_MODEL_TYPES.items():
|
||||
print(f"\nDownloading {model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
print('Downloading all models...')
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f'\n{name}:')
|
||||
download_models(model_enum)
|
||||
|
||||
print("\nDownload process completed.")
|
||||
print('\nDone!')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,163 +1,126 @@
|
||||
"""Age and Gender Detection Demo Script"""
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, AgeGender
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6):
|
||||
def draw_age_gender_label(image, bbox, gender_id: int, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
age_gender,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f" Face {i+1}: {gender}, {age} years old")
|
||||
gender_id, age = age_gender.predict(image, face['bbox'])
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
print(f' Face {i + 1}: {gender_str}, {age} years old')
|
||||
draw_age_gender_label(image, face['bbox'], gender_id, age)
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, vis_threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
print("Press 'q' to quit")
|
||||
|
||||
frame_count = 0
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
faces = detector.detect(frame)
|
||||
|
||||
frame_count += 1
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
for face in faces:
|
||||
gender_id, age = age_gender.predict(frame, face['bbox']) # predict per face
|
||||
draw_age_gender_label(frame, face['bbox'], gender_id, age)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Age & Gender Detection', frame)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face['bbox'])
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(frame, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(frame, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run age and gender detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing age/gender model...")
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
process_image(detector, age_gender, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,80 +1,95 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
# 1. Get the list of face dictionaries from the detector
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
# 2. Unpack the data into separate lists
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
|
||||
# 3. Pass the unpacked lists to the drawing function
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved at: {output_path}")
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Face Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection on an image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
|
||||
parser.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Detection method to use."
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
|
||||
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run face detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
print(f"Initializing detector: {args.method}")
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
detector = RetinaFace() if args.method == 'retinaface' else SCRFD()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
avg_time = 0
|
||||
for i in range(args.iterations):
|
||||
start = time.time()
|
||||
run_inference(detector, args.image, args.threshold, args.save_dir)
|
||||
elapsed = time.time() - start
|
||||
print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds")
|
||||
if i >= 0: # Avoid counting the first run if it includes model loading time
|
||||
avg_time += elapsed
|
||||
|
||||
if args.iterations > 1:
|
||||
# Adjust average calculation to exclude potential first-run overhead
|
||||
effective_iterations = max(1, args.iterations)
|
||||
print(
|
||||
f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
124
scripts/run_emotion.py
Normal file
124
scripts/run_emotion.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# Emotion detection on detected faces
|
||||
# Usage: python run_emotion.py --image path/to/image.jpg
|
||||
# python run_emotion.py --webcam
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Emotion, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_emotion_label(image, bbox, emotion: str, confidence: float):
|
||||
"""Draw emotion label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f'{emotion} ({confidence:.2f})'
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
||||
|
||||
|
||||
def process_image(
|
||||
detector,
|
||||
emotion_predictor,
|
||||
image_path: str,
|
||||
save_dir: str = 'outputs',
|
||||
threshold: float = 0.6,
|
||||
):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
|
||||
print(f' Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
|
||||
draw_emotion_label(image, face['bbox'], emotion, confidence)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
for face in faces:
|
||||
emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
|
||||
draw_emotion_label(frame, face['bbox'], emotion, confidence)
|
||||
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('Emotion Detection', frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Run emotion detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.image and not args.webcam:
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
emotion_predictor = Emotion()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, emotion_predictor, args.threshold)
|
||||
else:
|
||||
process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
116
scripts/run_face_analyzer.py
Normal file
116
scripts/run_face_analyzer.py
Normal file
@@ -0,0 +1,116 @@
|
||||
# Face analysis using FaceAnalyzer
|
||||
# Usage: python run_face_analyzer.py --image path/to/image.jpg
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def draw_face_info(image, face, face_id):
|
||||
"""Draw face ID and attributes above bounding box."""
|
||||
x1, y1, x2, y2 = map(int, face.bbox)
|
||||
lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
|
||||
if face.age and face.gender:
|
||||
lines.append(f'{face.gender}, {face.age}y')
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
|
||||
if y_pos < 20:
|
||||
y_pos = y2 + 20 + i * 25
|
||||
(tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
|
||||
cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
faces = analyzer.analyze(image)
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
return
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
info = f' Face {i}: {face.gender}, {face.age}y' if face.age and face.gender else f' Face {i}'
|
||||
if face.embedding is not None:
|
||||
info += f' (embedding: {face.embedding.shape})'
|
||||
print(info)
|
||||
|
||||
if show_similarity and len(faces) >= 2:
|
||||
print('\nSimilarity Matrix:')
|
||||
n = len(faces)
|
||||
sim_matrix = np.zeros((n, n))
|
||||
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
if i == j:
|
||||
sim_matrix[i][j] = 1.0
|
||||
else:
|
||||
sim = faces[i].compute_similarity(faces[j])
|
||||
sim_matrix[i][j] = sim
|
||||
sim_matrix[j][i] = sim
|
||||
|
||||
print(' ', end='')
|
||||
for i in range(n):
|
||||
print(f' F{i + 1:2d} ', end='')
|
||||
print('\n ' + '-' * (7 * n))
|
||||
|
||||
for i in range(n):
|
||||
print(f'F{i + 1:2d} | ', end='')
|
||||
for j in range(n):
|
||||
print(f'{sim_matrix[i][j]:6.3f} ', end='')
|
||||
print()
|
||||
|
||||
pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
|
||||
pairs.sort(key=lambda x: x[2], reverse=True)
|
||||
|
||||
print('\nTop matches (>0.4 = same person):')
|
||||
for i, j, sim in pairs[:3]:
|
||||
status = 'Same' if sim > 0.4 else 'Different'
|
||||
print(f' Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
|
||||
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks)
|
||||
|
||||
for i, face in enumerate(faces, 1):
|
||||
draw_face_info(image, face, i)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
|
||||
parser.add_argument('--image', type=str, required=True, help='Path to input image')
|
||||
parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
|
||||
parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.image):
|
||||
print(f'Error: Image not found: {args.image}')
|
||||
return
|
||||
|
||||
detector = RetinaFace()
|
||||
recognizer = ArcFace()
|
||||
age_gender = AgeGender()
|
||||
analyzer = FaceAnalyzer(detector, recognizer, age_gender)
|
||||
|
||||
process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,65 +1,70 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
raise RuntimeError(f'Failed to load image: {image_path}')
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
raise RuntimeError('No faces found in reference image.')
|
||||
|
||||
# Get landmarks from the first detected face dictionary
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
|
||||
# Use normalized embedding for more reliable similarity comparison
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
return embedding
|
||||
landmarks = faces[0]['landmarks']
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0)
|
||||
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
print("Webcam started. Press 'q' to quit.")
|
||||
raise RuntimeError('Webcam could not be opened.')
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Loop through each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from the dictionary
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
|
||||
bbox = face['bbox']
|
||||
landmarks = face['landmarks']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Get the normalized embedding for the current face
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# Compare with the reference embedding
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
# Draw results
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
# green = match, red = unknown
|
||||
label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
cv2.imshow('Face Recognition', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
@@ -67,44 +72,26 @@ def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser = argparse.ArgumentParser(description='Face search using a reference image')
|
||||
parser.add_argument('--image', type=str, required=True, help='Reference face image')
|
||||
parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
|
||||
parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
"--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
help="Face recognition method.",
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
enable_logging()
|
||||
|
||||
print("Initializing models...")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
|
||||
print("Extracting reference embedding...")
|
||||
print(f'Loading reference: {args.image}')
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
run_webcam(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,149 +1,117 @@
|
||||
"""Facial Landmark Detection Demo Script"""
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, Landmark106
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
print(f'Detected {len(faces)} face(s)')
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Process each face
|
||||
for i, face in enumerate(faces):
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i+1}: Extracted {len(landmarks)} landmarks")
|
||||
print(f' Face {i + 1}: {len(landmarks)} landmarks')
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Face {i+1}", (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
image,
|
||||
f'Face {i + 1}',
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Add total count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
print(f'Output saved: {output_path}')
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
print('Cannot open webcam')
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
print("Press 'q' to quit")
|
||||
|
||||
frame_count = 0
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
faces = detector.detect(frame)
|
||||
|
||||
frame_count += 1
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Process each face
|
||||
for face in faces:
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
cv2.imshow('106-Point Landmarks', frame)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description='Run facial landmark detection')
|
||||
parser.add_argument('--image', type=str, help='Path to input image')
|
||||
parser.add_argument('--webcam', action='store_true', help='Use webcam')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--save_dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
parser.error('Either --image or --webcam must be specified')
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing landmark detector...")
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
process_image(detector, landmarker, args.image, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,10 +1,24 @@
|
||||
import cv2
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == 'arcface':
|
||||
return ArcFace()
|
||||
elif name == 'mobileface':
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
@@ -14,115 +28,76 @@ def run_inference(detector, recognizer, image_path: str):
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
print('No faces detected.')
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
|
||||
print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')
|
||||
|
||||
# Process the first detected face
|
||||
first_face = faces[0]
|
||||
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
|
||||
|
||||
# Extract embedding using the landmarks from the face dictionary
|
||||
landmarks = faces[0]['landmarks'] # 5-point landmarks for alignment (already np.ndarray)
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
# Print some info about the embeddings
|
||||
print(f" - Embedding shape: {embedding.shape}")
|
||||
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
|
||||
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
|
||||
print(f' Embedding shape: {embedding.shape}')
|
||||
print(f' L2 norm (raw): {np.linalg.norm(embedding):.4f}')
|
||||
print(f' L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}')
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
|
||||
# Load images
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print(f"Error: Failed to load images")
|
||||
print('Error: Failed to load one or both images')
|
||||
return
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
if not faces1 or not faces2:
|
||||
print("Error: No faces detected in one or both images")
|
||||
print('Error: No faces detected in one or both images')
|
||||
return
|
||||
|
||||
# Get landmarks for first face in each image
|
||||
landmarks1 = np.array(faces1[0]['landmarks'])
|
||||
landmarks2 = np.array(faces2[0]['landmarks'])
|
||||
landmarks1 = faces1[0]['landmarks']
|
||||
landmarks2 = faces2[0]['landmarks']
|
||||
|
||||
# Get normalized embeddings
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# Compute similarity
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'}")
|
||||
print(f"Threshold: {threshold}")
|
||||
print(f'Similarity: {similarity:.4f}')
|
||||
print(f'Result: {"Same person" if is_match else "Different person"} (threshold: {threshold})')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison.")
|
||||
parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.")
|
||||
parser.add_argument("--image1", type=str, help="Path to first image for comparison.")
|
||||
parser.add_argument("--image2", type=str, help="Path to second image for comparison.")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.")
|
||||
parser = argparse.ArgumentParser(description='Face recognition and comparison')
|
||||
parser.add_argument('--image', type=str, help='Single image for embedding extraction')
|
||||
parser.add_argument('--image1', type=str, help='First image for comparison')
|
||||
parser.add_argument('--image2', type=str, help='Second image for comparison')
|
||||
parser.add_argument('--threshold', type=float, default=0.35, help='Similarity threshold')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
'--recognizer',
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
default='arcface',
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method to use."
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
# Face comparison mode
|
||||
print(f"Comparing faces: {args.image1} vs {args.image2}")
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
# Single image embedding extraction mode
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison")
|
||||
print('Error: Provide --image or both --image1 and --image2')
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,142 +1,107 @@
|
||||
"""Video Face Detection Script"""
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
|
||||
fps: int = None, show_preview: bool = False):
|
||||
# Open input video
|
||||
def process_video(
|
||||
detector,
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
threshold: float = 0.6,
|
||||
show_preview: bool = False,
|
||||
):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
source_fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
output_fps = fps if fps is not None else source_fps
|
||||
print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
|
||||
print(f'Output: {output_path}')
|
||||
|
||||
print(f"📹 Input: {input_path}")
|
||||
print(f" Resolution: {width}x{height}")
|
||||
print(f" FPS: {source_fps:.2f}")
|
||||
print(f" Total frames: {total_frames}")
|
||||
print(f"\n📹 Output: {output_path}")
|
||||
print(f" FPS: {output_fps:.2f}\n")
|
||||
|
||||
# Initialize video writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
# Process frames
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
try:
|
||||
with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
1,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
out.write(frame)
|
||||
|
||||
# Add frame info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Write frame
|
||||
out.write(frame)
|
||||
|
||||
# Show preview if requested
|
||||
if show_preview:
|
||||
cv2.imshow("Processing Video - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print("\nProcessing cancelled by user")
|
||||
break
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nProcessing interrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print('\nCancelled by user')
|
||||
break
|
||||
|
||||
# Summary
|
||||
print(f"\nProcessing complete!")
|
||||
print(f" Processed: {frame_count} frames")
|
||||
print(f" Total faces detected: {total_faces}")
|
||||
print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
|
||||
print(f" Output saved: {output_path}")
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
|
||||
print(f'Saved: {output_path}')
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Path to input video")
|
||||
parser.add_argument("--output", type=str, required=True, help="Path to output video")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--fps", type=int, default=None,
|
||||
help="Output FPS (default: same as input)")
|
||||
parser.add_argument("--preview", action="store_true",
|
||||
help="Show live preview during processing")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process video with face detection')
|
||||
parser.add_argument('--input', type=str, required=True, help='Input video path')
|
||||
parser.add_argument('--output', type=str, required=True, help='Output video path')
|
||||
parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
|
||||
parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
|
||||
parser.add_argument('--preview', action='store_true', help='Show live preview')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir = Path(args.output).parent
|
||||
if output_dir != Path('.'):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process video
|
||||
process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
|
||||
detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -5,31 +5,24 @@ from pathlib import Path
|
||||
|
||||
def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with file_path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
with file_path.open('rb') as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b''):
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compute SHA256 hash of a model weight file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"file",
|
||||
type=Path,
|
||||
help="Path to the model weight file (.onnx, .pth, etc)."
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Compute SHA256 hash of a file')
|
||||
parser.add_argument('file', type=Path, help='Path to file')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
print(f"File does not exist: {args.file}")
|
||||
print(f'File does not exist: {args.file}')
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"`SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -20,23 +20,23 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(age_gender_model):
|
||||
assert age_gender_model is not None, "AgeGender model initialization failed."
|
||||
assert age_gender_model is not None, 'AgeGender model initialization failed.'
|
||||
|
||||
|
||||
def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender, str), f"Gender should be string, got {type(gender)}"
|
||||
assert isinstance(age, int), f"Age should be int, got {type(age)}"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert isinstance(gender_id, int), f'Gender ID should be int, got {type(gender_id)}'
|
||||
assert isinstance(age, int), f'Age should be int, got {type(age)}'
|
||||
|
||||
|
||||
def test_gender_values(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Gender should be 'Male' or 'Female', got '{gender}'"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Gender ID should be 0 (Female) or 1 (Male), got {gender_id}'
|
||||
|
||||
|
||||
def test_age_range(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert 0 <= age <= 120, f'Age should be between 0 and 120, got {age}'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
@@ -47,9 +47,9 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
|
||||
]
|
||||
|
||||
for bbox in test_bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for bbox {bbox}"
|
||||
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
assert gender_id in [0, 1], f'Failed for bbox {bbox}'
|
||||
assert 0 <= age <= 120, f'Age out of range for bbox {bbox}'
|
||||
|
||||
|
||||
def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
@@ -57,31 +57,31 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
|
||||
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender in ['Male', 'Female'], f"Failed for image size {size}"
|
||||
assert 0 <= age <= 120, f"Age out of range for image size {size}"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert gender_id in [0, 1], f'Failed for image size {size}'
|
||||
assert 0 <= age <= 120, f'Age out of range for image size {size}'
|
||||
|
||||
|
||||
def test_consistency(age_gender_model, mock_image, mock_bbox):
|
||||
gender1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id1, age1 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_id2, age2 = age_gender_model.predict(mock_image, mock_bbox)
|
||||
|
||||
assert gender1 == gender2, "Same input should produce same gender prediction"
|
||||
assert age1 == age2, "Same input should produce same age prediction"
|
||||
assert gender_id1 == gender_id2, 'Same input should produce same gender prediction'
|
||||
assert age1 == age2, 'Same input should produce same age prediction'
|
||||
|
||||
|
||||
def test_bbox_list_format(age_gender_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as list"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_list)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as list'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_bbox_array_format(age_gender_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
gender, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender in ['Male', 'Female'], "Should work with bbox as numpy array"
|
||||
assert 0 <= age <= 120, "Age should be in valid range"
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox_array)
|
||||
assert gender_id in [0, 1], 'Should work with bbox as numpy array'
|
||||
assert 0 <= age <= 120, 'Age should be in valid range'
|
||||
|
||||
|
||||
def test_multiple_predictions(age_gender_model, mock_image):
|
||||
@@ -93,24 +93,25 @@ def test_multiple_predictions(age_gender_model, mock_image):
|
||||
|
||||
results = []
|
||||
for bbox in bboxes:
|
||||
gender, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender, age))
|
||||
gender_id, age = age_gender_model.predict(mock_image, bbox)
|
||||
results.append((gender_id, age))
|
||||
|
||||
assert len(results) == 3, "Should have 3 predictions"
|
||||
for gender, age in results:
|
||||
assert gender in ['Male', 'Female']
|
||||
assert len(results) == 3, 'Should have 3 predictions'
|
||||
for gender_id, age in results:
|
||||
assert gender_id in [0, 1]
|
||||
assert 0 <= age <= 120
|
||||
|
||||
|
||||
def test_age_is_positive(age_gender_model, mock_image, mock_bbox):
|
||||
for _ in range(5):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f"Age should be non-negative, got {age}"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
assert age >= 0, f'Age should be non-negative, got {age}'
|
||||
|
||||
|
||||
def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox):
|
||||
gender, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
text = f"{gender}, {age}y"
|
||||
assert isinstance(text, str), "Should be able to format as string"
|
||||
assert "Male" in text or "Female" in text, "Text should contain gender"
|
||||
assert "y" in text, "Text should contain 'y' for years"
|
||||
gender_id, age = age_gender_model.predict(mock_image, mock_bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
text = f'{gender_str}, {age}y'
|
||||
assert isinstance(text, str), 'Should be able to format as string'
|
||||
assert 'Male' in text or 'Female' in text, 'Text should contain gender'
|
||||
assert 'y' in text, "Text should contain 'y' for years"
|
||||
|
||||
@@ -17,7 +17,7 @@ def test_create_detector_retinaface():
|
||||
Test creating a RetinaFace detector using factory function.
|
||||
"""
|
||||
detector = create_detector('retinaface')
|
||||
assert detector is not None, "Failed to create RetinaFace detector"
|
||||
assert detector is not None, 'Failed to create RetinaFace detector'
|
||||
|
||||
|
||||
def test_create_detector_scrfd():
|
||||
@@ -25,7 +25,7 @@ def test_create_detector_scrfd():
|
||||
Test creating a SCRFD detector using factory function.
|
||||
"""
|
||||
detector = create_detector('scrfd')
|
||||
assert detector is not None, "Failed to create SCRFD detector"
|
||||
assert detector is not None, 'Failed to create SCRFD detector'
|
||||
|
||||
|
||||
def test_create_detector_with_config():
|
||||
@@ -36,9 +36,9 @@ def test_create_detector_with_config():
|
||||
'retinaface',
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.8,
|
||||
nms_thresh=0.3
|
||||
nms_thresh=0.3,
|
||||
)
|
||||
assert detector is not None, "Failed to create detector with custom config"
|
||||
assert detector is not None, 'Failed to create detector with custom config'
|
||||
|
||||
|
||||
def test_create_detector_invalid_method():
|
||||
@@ -53,12 +53,8 @@ def test_create_detector_scrfd_with_model():
|
||||
"""
|
||||
Test creating SCRFD detector with specific model.
|
||||
"""
|
||||
detector = create_detector(
|
||||
'scrfd',
|
||||
model_name=SCRFDWeights.SCRFD_10G_KPS,
|
||||
conf_thresh=0.5
|
||||
)
|
||||
assert detector is not None, "Failed to create SCRFD with specific model"
|
||||
detector = create_detector('scrfd', model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert detector is not None, 'Failed to create SCRFD with specific model'
|
||||
|
||||
|
||||
# create_recognizer tests
|
||||
@@ -67,7 +63,7 @@ def test_create_recognizer_arcface():
|
||||
Test creating an ArcFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('arcface')
|
||||
assert recognizer is not None, "Failed to create ArcFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create ArcFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_mobileface():
|
||||
@@ -75,7 +71,7 @@ def test_create_recognizer_mobileface():
|
||||
Test creating a MobileFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('mobileface')
|
||||
assert recognizer is not None, "Failed to create MobileFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create MobileFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_sphereface():
|
||||
@@ -83,7 +79,7 @@ def test_create_recognizer_sphereface():
|
||||
Test creating a SphereFace recognizer using factory function.
|
||||
"""
|
||||
recognizer = create_recognizer('sphereface')
|
||||
assert recognizer is not None, "Failed to create SphereFace recognizer"
|
||||
assert recognizer is not None, 'Failed to create SphereFace recognizer'
|
||||
|
||||
|
||||
def test_create_recognizer_invalid_method():
|
||||
@@ -100,7 +96,7 @@ def test_create_landmarker():
|
||||
Test creating a Landmark106 detector using factory function.
|
||||
"""
|
||||
landmarker = create_landmarker('2d106det')
|
||||
assert landmarker is not None, "Failed to create Landmark106 detector"
|
||||
assert landmarker is not None, 'Failed to create Landmark106 detector'
|
||||
|
||||
|
||||
def test_create_landmarker_default():
|
||||
@@ -108,7 +104,7 @@ def test_create_landmarker_default():
|
||||
Test creating landmarker with default parameters.
|
||||
"""
|
||||
landmarker = create_landmarker()
|
||||
assert landmarker is not None, "Failed to create default landmarker"
|
||||
assert landmarker is not None, 'Failed to create default landmarker'
|
||||
|
||||
|
||||
def test_create_landmarker_invalid_method():
|
||||
@@ -127,7 +123,7 @@ def test_detect_faces_retinaface():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_scrfd():
|
||||
@@ -137,7 +133,7 @@ def test_detect_faces_scrfd():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='scrfd')
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
|
||||
def test_detect_faces_with_threshold():
|
||||
@@ -147,11 +143,11 @@ def test_detect_faces_with_threshold():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list'
|
||||
|
||||
# All detections should respect threshold
|
||||
for face in faces:
|
||||
assert face['confidence'] >= 0.8, "All detections should meet confidence threshold"
|
||||
assert face['confidence'] >= 0.8, 'All detections should meet confidence threshold'
|
||||
|
||||
|
||||
def test_detect_faces_default_method():
|
||||
@@ -161,7 +157,7 @@ def test_detect_faces_default_method():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(mock_image) # No method specified
|
||||
|
||||
assert isinstance(faces, list), "detect_faces should return a list with default method"
|
||||
assert isinstance(faces, list), 'detect_faces should return a list with default method'
|
||||
|
||||
|
||||
def test_detect_faces_empty_image():
|
||||
@@ -171,8 +167,8 @@ def test_detect_faces_empty_image():
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = detect_faces(empty_image, method='retinaface')
|
||||
|
||||
assert isinstance(faces, list), "Should return a list even for empty image"
|
||||
assert len(faces) == 0, "Should detect no faces in blank image"
|
||||
assert isinstance(faces, list), 'Should return a list even for empty image'
|
||||
assert len(faces) == 0, 'Should detect no faces in blank image'
|
||||
|
||||
|
||||
# list_available_detectors tests
|
||||
@@ -182,8 +178,8 @@ def test_list_available_detectors():
|
||||
"""
|
||||
detectors = list_available_detectors()
|
||||
|
||||
assert isinstance(detectors, dict), "Should return a dictionary of detectors"
|
||||
assert len(detectors) > 0, "Should have at least one detector available"
|
||||
assert isinstance(detectors, dict), 'Should return a dictionary of detectors'
|
||||
assert len(detectors) > 0, 'Should have at least one detector available'
|
||||
|
||||
|
||||
def test_list_available_detectors_contents():
|
||||
@@ -206,7 +202,7 @@ def test_detector_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
|
||||
faces = detector.detect(mock_image)
|
||||
assert isinstance(faces, list), "Detector should return list of faces"
|
||||
assert isinstance(faces, list), 'Detector should return list of faces'
|
||||
|
||||
|
||||
def test_recognizer_inference_from_factory():
|
||||
@@ -217,8 +213,8 @@ def test_recognizer_inference_from_factory():
|
||||
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
|
||||
|
||||
embedding = recognizer.get_embedding(mock_image)
|
||||
assert embedding is not None, "Recognizer should return embedding"
|
||||
assert embedding.shape[1] == 512, "Should return 512-dimensional embedding"
|
||||
assert embedding is not None, 'Recognizer should return embedding'
|
||||
assert embedding.shape[1] == 512, 'Should return 512-dimensional embedding'
|
||||
|
||||
|
||||
def test_landmarker_inference_from_factory():
|
||||
@@ -230,8 +226,8 @@ def test_landmarker_inference_from_factory():
|
||||
mock_bbox = [100, 100, 300, 300]
|
||||
|
||||
landmarks = landmarker.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks is not None, "Landmarker should return landmarks"
|
||||
assert landmarks.shape == (106, 2), "Should return 106 landmarks"
|
||||
assert landmarks is not None, 'Landmarker should return landmarks'
|
||||
assert landmarks.shape == (106, 2), 'Should return 106 landmarks'
|
||||
|
||||
|
||||
def test_multiple_detector_creation():
|
||||
@@ -243,7 +239,7 @@ def test_multiple_detector_creation():
|
||||
|
||||
assert detector1 is not None
|
||||
assert detector2 is not None
|
||||
assert detector1 is not detector2, "Should create separate instances"
|
||||
assert detector1 is not detector2, 'Should create separate instances'
|
||||
|
||||
|
||||
def test_detector_with_different_configs():
|
||||
@@ -273,6 +269,6 @@ def test_factory_returns_correct_types():
|
||||
recognizer = create_recognizer('arcface')
|
||||
landmarker = create_landmarker('2d106det')
|
||||
|
||||
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
|
||||
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"
|
||||
assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance"
|
||||
assert isinstance(detector, RetinaFace), 'Should return RetinaFace instance'
|
||||
assert isinstance(recognizer, ArcFace), 'Should return ArcFace instance'
|
||||
assert isinstance(landmarker, Landmark106), 'Should return Landmark106 instance'
|
||||
|
||||
@@ -20,17 +20,17 @@ def mock_bbox():
|
||||
|
||||
|
||||
def test_model_initialization(landmark_model):
|
||||
assert landmark_model is not None, "Landmark106 model initialization failed."
|
||||
assert landmark_model is not None, 'Landmark106 model initialization failed.'
|
||||
|
||||
|
||||
def test_landmark_detection(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}"
|
||||
assert landmarks.shape == (106, 2), f'Expected shape (106, 2), got {landmarks.shape}'
|
||||
|
||||
|
||||
def test_landmark_dtype(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}"
|
||||
assert landmarks.dtype == np.float32, f'Expected float32, got {landmarks.dtype}'
|
||||
|
||||
|
||||
def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox):
|
||||
@@ -45,8 +45,8 @@ def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox
|
||||
x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin))
|
||||
y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin))
|
||||
|
||||
assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds"
|
||||
assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds"
|
||||
assert x_in_bounds >= 95, f'Only {x_in_bounds}/106 x-coordinates within bounds'
|
||||
assert y_in_bounds >= 95, f'Only {y_in_bounds}/106 y-coordinates within bounds'
|
||||
|
||||
|
||||
def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
@@ -58,22 +58,22 @@ def test_different_bbox_sizes(landmark_model, mock_image):
|
||||
|
||||
for bbox in test_bboxes:
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for bbox {bbox}'
|
||||
|
||||
|
||||
def test_landmark_array_format(landmark_model, mock_image, mock_bbox):
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks_int = landmarks.astype(int)
|
||||
|
||||
assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape"
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type"
|
||||
assert landmarks_int.shape == (106, 2), 'Integer conversion should preserve shape'
|
||||
assert landmarks_int.dtype in [np.int32, np.int64], 'Should convert to integer type'
|
||||
|
||||
|
||||
def test_consistency(landmark_model, mock_image, mock_bbox):
|
||||
landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
|
||||
assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks"
|
||||
assert np.allclose(landmarks1, landmarks2), 'Same input should produce same landmarks'
|
||||
|
||||
|
||||
def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
@@ -82,19 +82,19 @@ def test_different_image_sizes(landmark_model, mock_bbox):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
landmarks = landmark_model.get_landmarks(mock_image, mock_bbox)
|
||||
assert landmarks.shape == (106, 2), f"Failed for image size {size}"
|
||||
assert landmarks.shape == (106, 2), f'Failed for image size {size}'
|
||||
|
||||
|
||||
def test_bbox_list_format(landmark_model, mock_image):
|
||||
bbox_list = [100, 100, 300, 300]
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_list)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as list"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as list'
|
||||
|
||||
|
||||
def test_bbox_array_format(landmark_model, mock_image):
|
||||
bbox_array = np.array([100, 100, 300, 300])
|
||||
landmarks = landmark_model.get_landmarks(mock_image, bbox_array)
|
||||
assert landmarks.shape == (106, 2), "Should work with bbox as numpy array"
|
||||
assert landmarks.shape == (106, 2), 'Should work with bbox as numpy array'
|
||||
|
||||
|
||||
def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
@@ -103,5 +103,5 @@ def test_landmark_distribution(landmark_model, mock_image, mock_bbox):
|
||||
x_variance = np.var(landmarks[:, 0])
|
||||
y_variance = np.var(landmarks[:, 1])
|
||||
|
||||
assert x_variance > 0, "Landmarks should have variation in x-coordinates"
|
||||
assert y_variance > 0, "Landmarks should have variation in y-coordinates"
|
||||
assert x_variance > 0, 'Landmarks should have variation in x-coordinates'
|
||||
assert y_variance > 0, 'Landmarks should have variation in y-coordinates'
|
||||
|
||||
@@ -41,13 +41,16 @@ def mock_landmarks():
|
||||
"""
|
||||
Create mock 5-point facial landmarks.
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# ArcFace Tests
|
||||
@@ -55,7 +58,7 @@ def test_arcface_initialization(arcface_model):
|
||||
"""
|
||||
Test that the ArcFace model initializes correctly.
|
||||
"""
|
||||
assert arcface_model is not None, "ArcFace model initialization failed."
|
||||
assert arcface_model is not None, 'ArcFace model initialization failed.'
|
||||
|
||||
|
||||
def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
@@ -65,8 +68,8 @@ def test_arcface_embedding_shape(arcface_model, mock_aligned_face):
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# ArcFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
@@ -80,7 +83,7 @@ def test_arcface_normalized_embedding(arcface_model, mock_landmarks):
|
||||
|
||||
# Check that embedding is normalized (L2 norm ≈ 1.0)
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
@@ -88,7 +91,7 @@ def test_arcface_embedding_dtype(arcface_model, mock_aligned_face):
|
||||
Test that embeddings have the correct data type.
|
||||
"""
|
||||
embedding = arcface_model.get_embedding(mock_aligned_face)
|
||||
assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}"
|
||||
assert embedding.dtype == np.float32, f'Expected float32, got {embedding.dtype}'
|
||||
|
||||
|
||||
def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
@@ -98,7 +101,7 @@ def test_arcface_consistency(arcface_model, mock_aligned_face):
|
||||
embedding1 = arcface_model.get_embedding(mock_aligned_face)
|
||||
embedding2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
assert np.allclose(embedding1, embedding2), "Same input should produce same embedding"
|
||||
assert np.allclose(embedding1, embedding2), 'Same input should produce same embedding'
|
||||
|
||||
|
||||
# MobileFace Tests
|
||||
@@ -106,7 +109,7 @@ def test_mobileface_initialization(mobileface_model):
|
||||
"""
|
||||
Test that the MobileFace model initializes correctly.
|
||||
"""
|
||||
assert mobileface_model is not None, "MobileFace model initialization failed."
|
||||
assert mobileface_model is not None, 'MobileFace model initialization failed.'
|
||||
|
||||
|
||||
def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
@@ -116,8 +119,8 @@ def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face):
|
||||
embedding = mobileface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# MobileFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
@@ -129,7 +132,7 @@ def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks):
|
||||
embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# SphereFace Tests
|
||||
@@ -137,7 +140,7 @@ def test_sphereface_initialization(sphereface_model):
|
||||
"""
|
||||
Test that the SphereFace model initializes correctly.
|
||||
"""
|
||||
assert sphereface_model is not None, "SphereFace model initialization failed."
|
||||
assert sphereface_model is not None, 'SphereFace model initialization failed.'
|
||||
|
||||
|
||||
def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
@@ -147,8 +150,8 @@ def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face):
|
||||
embedding = sphereface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
# SphereFace typically produces 512-dimensional embeddings
|
||||
assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}"
|
||||
assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1"
|
||||
assert embedding.shape[1] == 512, f'Expected 512-dim embedding, got {embedding.shape[1]}'
|
||||
assert embedding.shape[0] == 1, 'Embedding should have batch dimension of 1'
|
||||
|
||||
|
||||
def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
@@ -160,7 +163,7 @@ def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks):
|
||||
embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks)
|
||||
|
||||
norm = np.linalg.norm(embedding)
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}"
|
||||
assert np.isclose(norm, 1.0, atol=1e-5), f'Normalized embedding should have norm 1.0, got {norm}'
|
||||
|
||||
|
||||
# Cross-model comparison tests
|
||||
@@ -173,8 +176,7 @@ def test_different_models_different_embeddings(arcface_model, mobileface_model,
|
||||
|
||||
# Embeddings should be different (with high probability for random input)
|
||||
# We check that they're not identical
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), \
|
||||
"Different models should produce different embeddings"
|
||||
assert not np.allclose(arcface_emb, mobileface_emb), 'Different models should produce different embeddings'
|
||||
|
||||
|
||||
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
@@ -191,10 +193,11 @@ def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
|
||||
|
||||
# Compute cosine similarity
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Similarity should be between -1 and 1
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
@@ -205,7 +208,8 @@ def test_same_face_high_similarity(arcface_model, mock_aligned_face):
|
||||
emb2 = arcface_model.get_embedding(mock_aligned_face)
|
||||
|
||||
from uniface import compute_similarity
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
|
||||
# Same image should have similarity close to 1.0
|
||||
assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}"
|
||||
assert similarity > 0.99, f'Same face should have similarity > 0.99, got {similarity}'
|
||||
|
||||
@@ -17,27 +17,27 @@ def retinaface_model():
|
||||
|
||||
|
||||
def test_model_initialization(retinaface_model):
|
||||
assert retinaface_model is not None, "Model initialization failed."
|
||||
assert retinaface_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(retinaface_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face['bbox']
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face['landmarks']
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(retinaface_model):
|
||||
@@ -45,11 +45,11 @@ def test_confidence_threshold(retinaface_model):
|
||||
faces = retinaface_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face['confidence']
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(retinaface_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = retinaface_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
@@ -15,27 +15,27 @@ def scrfd_model():
|
||||
|
||||
|
||||
def test_model_initialization(scrfd_model):
|
||||
assert scrfd_model is not None, "Model initialization failed."
|
||||
assert scrfd_model is not None, 'Model initialization failed.'
|
||||
|
||||
|
||||
def test_inference_on_640x640_image(scrfd_model):
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
assert isinstance(faces, list), "Detections should be a list."
|
||||
assert isinstance(faces, list), 'Detections should be a list.'
|
||||
|
||||
for face in faces:
|
||||
assert isinstance(face, dict), "Each detection should be a dictionary."
|
||||
assert "bbox" in face, "Each detection should have a 'bbox' key."
|
||||
assert "confidence" in face, "Each detection should have a 'confidence' key."
|
||||
assert "landmarks" in face, "Each detection should have a 'landmarks' key."
|
||||
assert isinstance(face, dict), 'Each detection should be a dictionary.'
|
||||
assert 'bbox' in face, "Each detection should have a 'bbox' key."
|
||||
assert 'confidence' in face, "Each detection should have a 'confidence' key."
|
||||
assert 'landmarks' in face, "Each detection should have a 'landmarks' key."
|
||||
|
||||
bbox = face["bbox"]
|
||||
assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)."
|
||||
bbox = face['bbox']
|
||||
assert len(bbox) == 4, 'BBox should have 4 values (x1, y1, x2, y2).'
|
||||
|
||||
landmarks = face["landmarks"]
|
||||
assert len(landmarks) == 5, "Should have 5 landmark points."
|
||||
assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)."
|
||||
landmarks = face['landmarks']
|
||||
assert len(landmarks) == 5, 'Should have 5 landmark points.'
|
||||
assert all(len(pt) == 2 for pt in landmarks), 'Each landmark should be (x, y).'
|
||||
|
||||
|
||||
def test_confidence_threshold(scrfd_model):
|
||||
@@ -43,14 +43,14 @@ def test_confidence_threshold(scrfd_model):
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
|
||||
for face in faces:
|
||||
confidence = face["confidence"]
|
||||
assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5"
|
||||
confidence = face['confidence']
|
||||
assert confidence >= 0.5, f'Detection has confidence {confidence} below threshold 0.5'
|
||||
|
||||
|
||||
def test_no_faces_detected(scrfd_model):
|
||||
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
|
||||
faces = scrfd_model.detect(empty_image)
|
||||
assert len(faces) == 0, "Should detect no faces in a blank image."
|
||||
assert len(faces) == 0, 'Should detect no faces in a blank image.'
|
||||
|
||||
|
||||
def test_different_input_sizes(scrfd_model):
|
||||
@@ -59,13 +59,13 @@ def test_different_input_sizes(scrfd_model):
|
||||
for size in test_sizes:
|
||||
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
|
||||
faces = scrfd_model.detect(mock_image)
|
||||
assert isinstance(faces, list), f"Should return list for size {size}"
|
||||
assert isinstance(faces, list), f'Should return list for size {size}'
|
||||
|
||||
|
||||
def test_scrfd_10g_model():
|
||||
model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
|
||||
assert model is not None, "SCRFD 10G model initialization failed."
|
||||
assert model is not None, 'SCRFD 10G model initialization failed.'
|
||||
|
||||
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
|
||||
faces = model.detect(mock_image)
|
||||
assert isinstance(faces, list), "SCRFD 10G should return list of detections."
|
||||
assert isinstance(faces, list), 'SCRFD 10G should return list of detections.'
|
||||
|
||||
@@ -18,13 +18,16 @@ def mock_landmarks():
|
||||
Create mock 5-point facial landmarks.
|
||||
Standard positions for a face roughly centered at (112/2, 112/2).
|
||||
"""
|
||||
return np.array([
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041] # Right mouth corner
|
||||
], dtype=np.float32)
|
||||
return np.array(
|
||||
[
|
||||
[38.2946, 51.6963], # Left eye
|
||||
[73.5318, 51.5014], # Right eye
|
||||
[56.0252, 71.7366], # Nose
|
||||
[41.5493, 92.3655], # Left mouth corner
|
||||
[70.7299, 92.2041], # Right mouth corner
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
# compute_similarity tests
|
||||
@@ -36,7 +39,7 @@ def test_compute_similarity_same_embedding():
|
||||
embedding = embedding / np.linalg.norm(embedding) # Normalize
|
||||
|
||||
similarity = compute_similarity(embedding, embedding)
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}"
|
||||
assert np.isclose(similarity, 1.0, atol=1e-5), f'Self-similarity should be 1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_range():
|
||||
@@ -53,7 +56,7 @@ def test_compute_similarity_range():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}"
|
||||
assert -1.0 <= similarity <= 1.0, f'Similarity should be in [-1, 1], got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_orthogonal():
|
||||
@@ -68,7 +71,7 @@ def test_compute_similarity_orthogonal():
|
||||
emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0]
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}"
|
||||
assert np.isclose(similarity, 0.0, atol=1e-5), f'Orthogonal embeddings should have similarity 0.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_opposite():
|
||||
@@ -81,7 +84,7 @@ def test_compute_similarity_opposite():
|
||||
emb2 = -emb1 # Opposite direction
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}"
|
||||
assert np.isclose(similarity, -1.0, atol=1e-5), f'Opposite embeddings should have similarity -1.0, got {similarity}'
|
||||
|
||||
|
||||
def test_compute_similarity_symmetry():
|
||||
@@ -98,7 +101,7 @@ def test_compute_similarity_symmetry():
|
||||
sim_12 = compute_similarity(emb1, emb2)
|
||||
sim_21 = compute_similarity(emb2, emb1)
|
||||
|
||||
assert np.isclose(sim_12, sim_21), "Similarity should be symmetric"
|
||||
assert np.isclose(sim_12, sim_21), 'Similarity should be symmetric'
|
||||
|
||||
|
||||
def test_compute_similarity_dtype():
|
||||
@@ -113,7 +116,7 @@ def test_compute_similarity_dtype():
|
||||
emb2 = emb2 / np.linalg.norm(emb2)
|
||||
|
||||
similarity = compute_similarity(emb1, emb2)
|
||||
assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}"
|
||||
assert isinstance(similarity, (float, np.floating)), f'Similarity should be float, got {type(similarity)}'
|
||||
|
||||
|
||||
# face_alignment tests
|
||||
@@ -123,7 +126,7 @@ def test_face_alignment_output_shape(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}"
|
||||
assert aligned.shape == (112, 112, 3), f'Expected shape (112, 112, 3), got {aligned.shape}'
|
||||
|
||||
|
||||
def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
@@ -132,7 +135,7 @@ def test_face_alignment_dtype(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}"
|
||||
assert aligned.dtype == np.uint8, f'Expected uint8, got {aligned.dtype}'
|
||||
|
||||
|
||||
def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
@@ -144,7 +147,7 @@ def test_face_alignment_different_sizes(mock_image, mock_landmarks):
|
||||
|
||||
for size in test_sizes:
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size)
|
||||
assert aligned.shape == (*size, 3), f"Failed for size {size}"
|
||||
assert aligned.shape == (*size, 3), f'Failed for size {size}'
|
||||
|
||||
|
||||
def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
@@ -154,7 +157,7 @@ def test_face_alignment_consistency(mock_image, mock_landmarks):
|
||||
aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face"
|
||||
assert np.allclose(aligned1, aligned2), 'Same input should produce same aligned face'
|
||||
|
||||
|
||||
def test_face_alignment_landmarks_as_list(mock_image):
|
||||
@@ -166,13 +169,13 @@ def test_face_alignment_landmarks_as_list(mock_image):
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
]
|
||||
|
||||
# Convert list to numpy array before passing to face_alignment
|
||||
landmarks_array = np.array(landmarks_list, dtype=np.float32)
|
||||
aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with landmarks as array"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with landmarks as array'
|
||||
|
||||
|
||||
def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
@@ -181,8 +184,8 @@ def test_face_alignment_value_range(mock_image, mock_landmarks):
|
||||
"""
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
assert np.all(aligned >= 0), "Pixel values should be >= 0"
|
||||
assert np.all(aligned <= 255), "Pixel values should be <= 255"
|
||||
assert np.all(aligned >= 0), 'Pixel values should be >= 0'
|
||||
assert np.all(aligned <= 255), 'Pixel values should be <= 255'
|
||||
|
||||
|
||||
def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
@@ -192,7 +195,7 @@ def test_face_alignment_not_all_zeros(mock_image, mock_landmarks):
|
||||
aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112))
|
||||
|
||||
# At least some pixels should be non-zero
|
||||
assert np.any(aligned > 0), "Aligned face should have some non-zero pixels"
|
||||
assert np.any(aligned > 0), 'Aligned face should have some non-zero pixels'
|
||||
|
||||
|
||||
def test_face_alignment_from_different_positions(mock_image):
|
||||
@@ -201,14 +204,23 @@ def test_face_alignment_from_different_positions(mock_image):
|
||||
"""
|
||||
# Landmarks at different positions
|
||||
positions = [
|
||||
np.array([[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]], dtype=np.float32),
|
||||
np.array([[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]], dtype=np.float32),
|
||||
np.array([[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]], dtype=np.float32),
|
||||
np.array(
|
||||
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
np.array(
|
||||
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
|
||||
dtype=np.float32,
|
||||
),
|
||||
]
|
||||
|
||||
for landmarks in positions:
|
||||
aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}"
|
||||
assert aligned.shape == (112, 112, 3), f'Failed for landmarks at {landmarks[0]}'
|
||||
|
||||
|
||||
def test_face_alignment_landmark_count(mock_image):
|
||||
@@ -216,16 +228,19 @@ def test_face_alignment_landmark_count(mock_image):
|
||||
Test that face_alignment works specifically with 5-point landmarks.
|
||||
"""
|
||||
# Standard 5-point landmarks
|
||||
landmarks_5pt = np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
], dtype=np.float32)
|
||||
landmarks_5pt = np.array(
|
||||
[
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
|
||||
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"
|
||||
assert aligned.shape == (112, 112, 3), 'Should work with 5-point landmarks'
|
||||
|
||||
|
||||
def test_compute_similarity_with_recognition_embeddings():
|
||||
|
||||
@@ -11,9 +11,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "1.1.0"
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.1.2'
|
||||
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
@@ -21,42 +21,54 @@ from uniface.log import Logger, enable_logging
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
from .analyzer import FaceAnalyzer
|
||||
from .attribute import AgeGender
|
||||
from .face import Face
|
||||
|
||||
try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
|
||||
__all__ = [
|
||||
"__author__",
|
||||
"__license__",
|
||||
"__version__",
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
# Core classes
|
||||
'Face',
|
||||
'FaceAnalyzer',
|
||||
# Factory functions
|
||||
"create_detector",
|
||||
"create_landmarker",
|
||||
"create_recognizer",
|
||||
"detect_faces",
|
||||
"list_available_detectors",
|
||||
'create_detector',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
# Detection models
|
||||
"RetinaFace",
|
||||
"SCRFD",
|
||||
'RetinaFace',
|
||||
'SCRFD',
|
||||
# Recognition models
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
# Landmark models
|
||||
"Landmark106",
|
||||
'Landmark106',
|
||||
# Attribute models
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
'AgeGender',
|
||||
'Emotion',
|
||||
# Utilities
|
||||
"compute_similarity",
|
||||
"draw_detections",
|
||||
"face_alignment",
|
||||
"verify_model_weights",
|
||||
"Logger",
|
||||
"enable_logging",
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
'Logger',
|
||||
'enable_logging',
|
||||
]
|
||||
|
||||
84
uniface/analyzer.py
Normal file
84
uniface/analyzer.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
from uniface.detection.base import BaseDetector
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.recognition.base import BaseRecognizer
|
||||
|
||||
__all__ = ['FaceAnalyzer']
|
||||
|
||||
|
||||
class FaceAnalyzer:
|
||||
"""Unified face analyzer combining detection, recognition, and attributes."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
detector: BaseDetector,
|
||||
recognizer: Optional[BaseRecognizer] = None,
|
||||
age_gender: Optional[AgeGender] = None,
|
||||
) -> None:
|
||||
self.detector = detector
|
||||
self.recognizer = recognizer
|
||||
self.age_gender = age_gender
|
||||
|
||||
Logger.info(f'Initialized FaceAnalyzer with detector={detector.__class__.__name__}')
|
||||
if recognizer:
|
||||
Logger.info(f' - Recognition enabled: {recognizer.__class__.__name__}')
|
||||
if age_gender:
|
||||
Logger.info(f' - Age/Gender enabled: {age_gender.__class__.__name__}')
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
|
||||
embedding = None
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender_id = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender_id, age = self.age_gender.predict(image, bbox)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender_str}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender_id=gender_id,
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'FaceAnalyzer(detector={self.detector.__class__.__name__}']
|
||||
if self.recognizer:
|
||||
parts.append(f'recognizer={self.recognizer.__class__.__name__}')
|
||||
if self.age_gender:
|
||||
parts.append(f'age_gender={self.age_gender.__class__.__name__}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,7 +2,8 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict, Any, List, Union
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.attribute.age_gender import AgeGender
|
||||
@@ -12,18 +13,14 @@ from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = [
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
"create_attribute_predictor",
|
||||
"predict_attributes"
|
||||
]
|
||||
__all__ = ['AgeGender', 'Emotion', 'create_attribute_predictor', 'predict_attributes']
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
@@ -35,10 +32,7 @@ if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
|
||||
|
||||
def create_attribute_predictor(
|
||||
model_name: Union[AgeGenderWeights, DDAMFNWeights],
|
||||
**kwargs: Any
|
||||
) -> Attribute:
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
@@ -59,17 +53,16 @@ def create_attribute_predictor(
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(f"Unsupported attribute model: {model_name}. "
|
||||
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
|
||||
raise ValueError(
|
||||
f'Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights.'
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray,
|
||||
detections: List[Dict[str, np.ndarray]],
|
||||
predictor: Attribute
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
@@ -95,8 +88,8 @@ def predict_attributes(
|
||||
face['attributes'] = {}
|
||||
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender'] = gender
|
||||
gender_id, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender_id'] = gender_id
|
||||
face['attributes']['age'] = age
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
|
||||
@@ -14,7 +14,7 @@ from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
__all__ = ["AgeGender"]
|
||||
__all__ = ['AgeGender']
|
||||
|
||||
|
||||
class AgeGender(Attribute):
|
||||
@@ -22,8 +22,8 @@ class AgeGender(Attribute):
|
||||
Age and gender prediction model using ONNX Runtime.
|
||||
|
||||
This class inherits from the base `Attribute` class and implements the
|
||||
functionality for predicting age (in years) and gender (0 for female,
|
||||
1 for male) from a face image. It requires a bounding box to locate the face.
|
||||
functionality for predicting age (in years) and gender ID (0 for Female,
|
||||
1 for Male) from a face image. It requires a bounding box to locate the face.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
|
||||
@@ -34,7 +34,7 @@ class AgeGender(Attribute):
|
||||
model_name (AgeGenderWeights): The enum specifying the model weights
|
||||
to load.
|
||||
"""
|
||||
Logger.info(f"Initializing AgeGender with model={model_name.name}")
|
||||
Logger.info(f'Initializing AgeGender with model={model_name.name}')
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
@@ -49,10 +49,13 @@ class AgeGender(Attribute):
|
||||
self.input_name = input_meta.name
|
||||
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
|
||||
Logger.info(f'Successfully initialized AgeGender model with input size {self.input_size}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f'Failed to initialize AgeGender model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
@@ -76,11 +79,15 @@ class AgeGender(Attribute):
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[str, int]:
|
||||
def postprocess(self, prediction: np.ndarray) -> Tuple[int, int]:
|
||||
"""
|
||||
Processes the raw model output to extract gender and age.
|
||||
|
||||
@@ -88,17 +95,16 @@ class AgeGender(Attribute):
|
||||
prediction (np.ndarray): The raw output from the model inference.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label ("Female" or "Male")
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male)
|
||||
and age (in years).
|
||||
"""
|
||||
# First two values are gender logits
|
||||
gender_id = int(np.argmax(prediction[:2]))
|
||||
gender = "Female" if gender_id == 0 else "Male"
|
||||
# Third value is normalized age, scaled by 100
|
||||
age = int(np.round(prediction[2] * 100))
|
||||
return gender, age
|
||||
return gender_id, age
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[str, int]:
|
||||
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[int, int]:
|
||||
"""
|
||||
Predicts age and gender for a single face specified by a bounding box.
|
||||
|
||||
@@ -107,22 +113,22 @@ class AgeGender(Attribute):
|
||||
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: A tuple containing the predicted gender label and age.
|
||||
Tuple[int, int]: A tuple containing the predicted gender ID (0 for Female, 1 for Male) and age.
|
||||
"""
|
||||
face_blob = self.preprocess(image, bbox)
|
||||
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
|
||||
gender, age = self.postprocess(prediction)
|
||||
return gender, age
|
||||
gender_id, age = self.postprocess(prediction)
|
||||
return gender_id, age
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
# To run this script, you need to have uniface.detection installed
|
||||
# or available in your path.
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
@@ -133,14 +139,14 @@ if __name__ == "__main__":
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame
|
||||
@@ -148,25 +154,34 @@ if __name__ == "__main__":
|
||||
|
||||
# For each detected face, predict age and gender
|
||||
for detection in detections:
|
||||
box = detection["bbox"]
|
||||
box = detection['bbox']
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes
|
||||
gender, age = age_gender_predictor.predict(frame, box)
|
||||
gender_id, age = age_gender_predictor.predict(frame, box)
|
||||
gender_str = 'Female' if gender_id == 0 else 'Male'
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{gender}, {age}"
|
||||
label = f'{gender_str}, {age}'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
print('Inference stopped.')
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@@ -26,7 +27,7 @@ class Attribute(ABC):
|
||||
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
|
||||
warm-up procedures to prepare the model for prediction.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the _initialize_model method.")
|
||||
raise NotImplementedError('Subclasses must implement the _initialize_model method.')
|
||||
|
||||
@abstractmethod
|
||||
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -46,7 +47,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The preprocessed data ready for model inference.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the preprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the preprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, prediction: Any) -> Any:
|
||||
@@ -63,7 +64,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final, processed attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the postprocess method.")
|
||||
raise NotImplementedError('Subclasses must implement the postprocess method.')
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image: np.ndarray, *args: Any) -> Any:
|
||||
@@ -82,7 +83,7 @@ class Attribute(ABC):
|
||||
Returns:
|
||||
The final predicted attributes.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must implement the predict method.")
|
||||
raise NotImplementedError('Subclasses must implement the predict method.')
|
||||
|
||||
def __call__(self, *args, **kwargs) -> Any:
|
||||
"""
|
||||
|
||||
@@ -2,18 +2,19 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from typing import Tuple, Union, List
|
||||
import torch
|
||||
|
||||
from uniface.attribute.base import Attribute
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import DDAMFNWeights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
__all__ = ["Emotion"]
|
||||
__all__ = ['Emotion']
|
||||
|
||||
|
||||
class Emotion(Attribute):
|
||||
@@ -37,15 +38,30 @@ class Emotion(Attribute):
|
||||
model_weights (DDAMFNWeights): The enum for the model weights to load.
|
||||
input_size (Tuple[int, int]): The expected input size for the model.
|
||||
"""
|
||||
Logger.info(f"Initializing Emotion with model={model_weights.name}")
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
Logger.info(f'Initializing Emotion with model={model_weights.name}')
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
self.device = torch.device('mps')
|
||||
elif torch.cuda.is_available():
|
||||
self.device = torch.device('cuda')
|
||||
else:
|
||||
self.device = torch.device('cpu')
|
||||
|
||||
self.input_size = input_size
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
|
||||
self.emotion_labels = [
|
||||
'Neutral',
|
||||
'Happy',
|
||||
'Sad',
|
||||
'Surprise',
|
||||
'Fear',
|
||||
'Disgust',
|
||||
'Angry',
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append("Contempt")
|
||||
self.emotion_labels.append('Contempt')
|
||||
|
||||
self._initialize_model()
|
||||
|
||||
@@ -60,10 +76,10 @@ class Emotion(Attribute):
|
||||
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
|
||||
with torch.no_grad():
|
||||
self.model(dummy_input)
|
||||
Logger.info(f"Successfully initialized Emotion model on {self.device}")
|
||||
Logger.info(f'Successfully initialized Emotion model on {self.device}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
|
||||
raise RuntimeError(f'Failed to initialize Emotion model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
"""
|
||||
@@ -77,7 +93,7 @@ class Emotion(Attribute):
|
||||
torch.Tensor: The preprocessed image tensor ready for inference.
|
||||
"""
|
||||
landmark = np.asarray(landmark)
|
||||
|
||||
|
||||
aligned_image, _ = face_alignment(image, landmark)
|
||||
|
||||
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
|
||||
@@ -114,11 +130,11 @@ class Emotion(Attribute):
|
||||
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import create_detector
|
||||
if __name__ == '__main__':
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
print('Initializing models for live inference...')
|
||||
# 1. Initialize the face detector
|
||||
# Using a smaller model for faster real-time performance
|
||||
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
|
||||
@@ -129,14 +145,14 @@ if __name__ == "__main__":
|
||||
# 3. Start webcam capture
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Error: Could not open webcam.")
|
||||
print('Error: Could not open webcam.')
|
||||
exit()
|
||||
|
||||
print("Starting webcam feed. Press 'q' to quit.")
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Error: Failed to capture frame.")
|
||||
print('Error: Failed to capture frame.')
|
||||
break
|
||||
|
||||
# Detect faces in the current frame.
|
||||
@@ -151,11 +167,19 @@ if __name__ == "__main__":
|
||||
|
||||
# Predict attributes using the landmark
|
||||
emotion, confidence = emotion_predictor.predict(frame, landmark)
|
||||
|
||||
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{emotion} ({confidence:.2f})"
|
||||
label = f'{emotion} ({confidence:.2f})'
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
@@ -167,4 +191,4 @@ if __name__ == "__main__":
|
||||
# Release resources
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Inference stopped.")
|
||||
print('Inference stopped.')
|
||||
|
||||
@@ -2,12 +2,22 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import itertools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List
|
||||
__all__ = [
|
||||
'resize_image',
|
||||
'generate_anchors',
|
||||
'non_max_suppression',
|
||||
'decode_boxes',
|
||||
'decode_landmarks',
|
||||
'distance2bbox',
|
||||
'distance2kps',
|
||||
]
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
@@ -45,26 +55,19 @@ def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
Generate anchor boxes for a given image size (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
np.ndarray: Anchor box coordinates as a NumPy array with shape (num_anchors, 4).
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
@@ -82,16 +85,16 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
def non_max_suppression(dets: np.ndarray, threshold: float) -> List[int]:
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
dets (np.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
List[int]: Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
@@ -122,19 +125,21 @@ def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
def decode_boxes(loc: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
the encoding done for offset regression at train time (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
variances (Optional[List[float]]): Variances of prior boxes. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
np.ndarray: Decoded bounding box predictions with shape [num_priors, 4]
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
@@ -149,18 +154,22 @@ def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
def decode_landmarks(
|
||||
predictions: np.ndarray, priors: np.ndarray, variances: Optional[List[float]] = None
|
||||
) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
Decode landmark predictions using prior boxes (RetinaFace specific).
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
variances (Optional[List[float]]): Scaling factors for landmark offsets. Defaults to [0.1, 0.2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
if variances is None:
|
||||
variances = [0.1, 0.2]
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
@@ -176,3 +185,59 @@ def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to bounding box (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom) with shape (n, 4).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding boxes with shape (n, 4) as [x1, y1, x2, y2].
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
|
||||
if max_shape is not None:
|
||||
x1 = np.clip(x1, 0, max_shape[1])
|
||||
y1 = np.clip(y1, 0, max_shape[0])
|
||||
x2 = np.clip(x2, 0, max_shape[1])
|
||||
y2 = np.clip(y2, 0, max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points: np.ndarray, distance: np.ndarray, max_shape: Optional[Tuple[int, int]] = None) -> np.ndarray:
|
||||
"""
|
||||
Decode distance prediction to keypoints (SCRFD specific).
|
||||
|
||||
Args:
|
||||
points (np.ndarray): Anchor points with shape (n, 2), [x, y].
|
||||
distance (np.ndarray): Distance from the given point to keypoints with shape (n, 2k).
|
||||
max_shape (Optional[Tuple[int, int]]): Shape of the image (height, width) for clipping.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = np.clip(px, 0, max_shape[1])
|
||||
py = np.clip(py, 0, max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
|
||||
|
||||
# fmt: off
|
||||
class SphereFaceWeights(str, Enum):
|
||||
"""
|
||||
@@ -78,87 +79,67 @@ class LandmarkWeights(str, Enum):
|
||||
"""
|
||||
DEFAULT = "2d_106"
|
||||
|
||||
# fmt: on
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
|
||||
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
|
||||
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
|
||||
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
|
||||
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
|
||||
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple, Dict, Any, List
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .scrfd import SCRFD
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
|
||||
# Global cache for detector instances
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
@@ -38,9 +39,9 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
|
||||
sorted_kwargs = sorted(kwargs.items())
|
||||
cache_key = f"{method_name}_{str(sorted_kwargs)}"
|
||||
cache_key = f'{method_name}_{str(sorted_kwargs)}'
|
||||
|
||||
if cache_key not in _detector_cache:
|
||||
# Pass kwargs to create the correctly configured detector
|
||||
@@ -96,10 +97,7 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd']
|
||||
raise ValueError(
|
||||
f"Unsupported detection method: '{method}'. "
|
||||
f"Available methods: {available_methods}"
|
||||
)
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
@@ -118,8 +116,8 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
'scrfd': {
|
||||
'description': 'SCRFD detector - fast and accurate with efficient architecture',
|
||||
@@ -129,9 +127,9 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
}
|
||||
'input_size': (640, 640),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,13 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Base classes for face detection.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple, Dict, Any
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
@@ -24,7 +21,7 @@ class BaseDetector(ABC):
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
|
||||
@@ -33,11 +30,18 @@ class BaseDetector(ABC):
|
||||
**kwargs: Additional detection parameters
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
- detections: Bounding boxes with confidence scores, shape (N, 5)
|
||||
Format: [x_min, y_min, x_max, y_max, confidence]
|
||||
- landmarks: Facial landmark points, shape (N, 5, 2) for 5-point landmarks
|
||||
or (N, 68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -55,7 +59,7 @@ class BaseDetector(ABC):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def postprocess(self, outputs, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def postprocess(self, outputs, **kwargs) -> Any:
|
||||
"""
|
||||
Postprocess model outputs to get final detections.
|
||||
|
||||
@@ -64,13 +68,13 @@ class BaseDetector(ABC):
|
||||
**kwargs: Additional postprocessing parameters
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: (detections, landmarks)
|
||||
Any: Processed outputs (implementation-specific format, typically tuple of arrays)
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""String representation of the detector."""
|
||||
return f"{self.__class__.__name__}({self.config})"
|
||||
return f'{self.__class__.__name__}({self.config})'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Detailed string representation."""
|
||||
@@ -96,5 +100,5 @@ class BaseDetector(ABC):
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config
|
||||
'config': self.config,
|
||||
}
|
||||
|
||||
@@ -2,23 +2,23 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List, Literal, Dict, Any
|
||||
|
||||
from uniface.common import (
|
||||
decode_boxes,
|
||||
decode_landmarks,
|
||||
generate_anchors,
|
||||
non_max_suppression,
|
||||
resize_image,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import (
|
||||
non_max_supression,
|
||||
resize_image,
|
||||
decode_boxes,
|
||||
generate_anchors,
|
||||
decode_landmarks
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace(BaseDetector):
|
||||
@@ -36,7 +36,8 @@ class RetinaFace(BaseDetector):
|
||||
pre_nms_topk (int, optional): Number of top-scoring boxes considered before NMS. Defaults to 5000.
|
||||
post_nms_topk (int, optional): Max number of detections kept after NMS. Defaults to 750.
|
||||
dynamic_size (bool, optional): If True, generate anchors dynamically per input image. Defaults to False.
|
||||
input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`. Defaults to (640, 640).
|
||||
input_size (Tuple[int, int], optional): Fixed input size (width, height) if `dynamic_size=False`.
|
||||
Defaults to (640, 640).
|
||||
|
||||
Attributes:
|
||||
model_name (RetinaFaceWeights): Selected model variant.
|
||||
@@ -68,18 +69,18 @@ class RetinaFace(BaseDetector):
|
||||
self.input_size = kwargs.get('input_size', (640, 640))
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
|
||||
f"input_size={self.input_size}"
|
||||
f'Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, '
|
||||
f'nms_thresh={self.nms_thresh}, input_size={self.input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(self.model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Precompute anchors if using static size
|
||||
if not self.dynamic_size and self.input_size is not None:
|
||||
self._priors = generate_anchors(image_size=self.input_size)
|
||||
Logger.debug("Generated anchors for static input size.")
|
||||
Logger.debug('Generated anchors for static input size.')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
@@ -98,7 +99,7 @@ class RetinaFace(BaseDetector):
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
@@ -132,8 +133,8 @@ class RetinaFace(BaseDetector):
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2.0
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
@@ -149,9 +150,18 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -178,10 +188,12 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
@@ -199,15 +211,17 @@ class RetinaFace(BaseDetector):
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4].astype(float).tolist(),
|
||||
'confidence': detections[i, 4].item(),
|
||||
'landmarks': landmarks[i].astype(float).tolist()
|
||||
'bbox': detections[i, :4].astype(np.float32),
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i].astype(np.float32),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
@@ -226,7 +240,11 @@ class RetinaFace(BaseDetector):
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
loc, conf, landmarks = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
@@ -242,22 +260,31 @@ class RetinaFace(BaseDetector):
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
order = scores.argsort()[::-1][: self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
|
||||
keep = non_max_supression(detections, self.nms_thresh)
|
||||
keep = non_max_suppression(detections, self.nms_thresh)
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
detections, landmarks = (
|
||||
detections[: self.post_nms_topk],
|
||||
landmarks[: self.post_nms_topk],
|
||||
)
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def _scale_detections(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
@@ -272,22 +299,23 @@ class RetinaFace(BaseDetector):
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for (x, y) in points.astype(np.int32):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
@@ -295,7 +323,7 @@ if __name__ == "__main__":
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
@@ -318,11 +346,18 @@ if __name__ == "__main__":
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
|
||||
@@ -7,15 +7,15 @@ from typing import Any, Dict, List, Literal, Tuple
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import distance2bbox, distance2kps, non_max_supression, resize_image
|
||||
|
||||
__all__ = ["SCRFD"]
|
||||
__all__ = ['SCRFD']
|
||||
|
||||
|
||||
class SCRFD(BaseDetector):
|
||||
@@ -52,10 +52,10 @@ class SCRFD(BaseDetector):
|
||||
super().__init__(**kwargs)
|
||||
self._supports_landmarks = True # SCRFD supports landmarks
|
||||
|
||||
model_name = kwargs.get("model_name", SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
input_size = kwargs.get("input_size", (640, 640))
|
||||
model_name = kwargs.get('model_name', SCRFDWeights.SCRFD_10G_KPS)
|
||||
conf_thresh = kwargs.get('conf_thresh', 0.5)
|
||||
nms_thresh = kwargs.get('nms_thresh', 0.4)
|
||||
input_size = kwargs.get('input_size', (640, 640))
|
||||
|
||||
self.conf_thresh = conf_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
@@ -69,13 +69,13 @@ class SCRFD(BaseDetector):
|
||||
# ---------------------------------
|
||||
|
||||
Logger.info(
|
||||
f"Initializing SCRFD with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, "
|
||||
f"input_size={input_size}"
|
||||
f'Initializing SCRFD with model={model_name}, conf_thresh={conf_thresh}, nms_thresh={nms_thresh}, '
|
||||
f'input_size={input_size}'
|
||||
)
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self._model_path}")
|
||||
Logger.info(f'Verified model weights located at: {self._model_path}')
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model(self._model_path)
|
||||
@@ -94,7 +94,7 @@ class SCRFD(BaseDetector):
|
||||
self.session = create_onnx_session(model_path)
|
||||
self.input_names = self.session.get_inputs()[0].name
|
||||
self.output_names = [x.name for x in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized the model from {model_path}")
|
||||
Logger.info(f'Successfully initialized the model from {model_path}')
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load model from '{model_path}': {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
|
||||
@@ -173,7 +173,11 @@ class SCRFD(BaseDetector):
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
@@ -189,9 +193,18 @@ class SCRFD(BaseDetector):
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox': [x1, y1, x2, y2] - Bounding box coordinates
|
||||
- 'confidence': float - Detection confidence score
|
||||
- 'landmarks': [[x1, y1], [x2, y2], [x3, y3], [x4, y4], [x5, y5]] - 5-point facial landmarks
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -219,7 +232,7 @@ class SCRFD(BaseDetector):
|
||||
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
||||
pre_det = pre_det[order, :]
|
||||
|
||||
keep = non_max_supression(pre_det, threshold=self.nms_thresh)
|
||||
keep = non_max_suppression(pre_det, threshold=self.nms_thresh)
|
||||
|
||||
detections = pre_det[keep, :]
|
||||
landmarks = landmarks[order, :, :]
|
||||
@@ -240,7 +253,7 @@ class SCRFD(BaseDetector):
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
if metric == "max":
|
||||
if metric == 'max':
|
||||
values = area
|
||||
else:
|
||||
values = area - offset_dist_squared * center_weight
|
||||
@@ -253,9 +266,9 @@ class SCRFD(BaseDetector):
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
'bbox': detections[i, :4].astype(np.float32),
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i].astype(np.float32),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
@@ -266,7 +279,7 @@ class SCRFD(BaseDetector):
|
||||
def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
x1, y1, x2, y2 = map(int, bbox) # Unpack 4 bbox values
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
|
||||
cv2.putText(frame, f"{score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
cv2.putText(frame, f'{score:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
@@ -274,13 +287,13 @@ def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
if not cap.isOpened():
|
||||
print("Failed to open webcam.")
|
||||
print('Failed to open webcam.')
|
||||
exit()
|
||||
|
||||
print("Webcam started. Press 'q' to exit.")
|
||||
@@ -288,7 +301,7 @@ if __name__ == "__main__":
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to read frame.")
|
||||
print('Failed to read frame.')
|
||||
break
|
||||
|
||||
# Get face detections as list of dictionaries
|
||||
@@ -297,9 +310,9 @@ if __name__ == "__main__":
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
@@ -311,10 +324,18 @@ if __name__ == "__main__":
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f'Faces: {len(faces)}',
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
cv2.imshow('FaceDetection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
|
||||
@@ -1,232 +0,0 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import itertools
|
||||
import numpy as np
|
||||
|
||||
from typing import Tuple, List
|
||||
|
||||
|
||||
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Resize an image to fit within a target shape while keeping its aspect ratio.
|
||||
|
||||
Args:
|
||||
frame (np.ndarray): Input image.
|
||||
target_shape (Tuple[int, int]): Target size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, float]: Resized image on a blank canvas and the resize factor.
|
||||
"""
|
||||
width, height = target_shape
|
||||
|
||||
# Aspect-ratio preserving resize
|
||||
im_ratio = float(frame.shape[0]) / frame.shape[1]
|
||||
model_ratio = height / width
|
||||
if im_ratio > model_ratio:
|
||||
new_height = height
|
||||
new_width = int(new_height / im_ratio)
|
||||
else:
|
||||
new_width = width
|
||||
new_height = int(new_width * im_ratio)
|
||||
|
||||
resize_factor = float(new_height) / frame.shape[0]
|
||||
resized_frame = cv2.resize(frame, (new_width, new_height))
|
||||
|
||||
# Create blank image and place resized image on it
|
||||
image = np.zeros((height, width, 3), dtype=np.uint8)
|
||||
image[:new_height, :new_width, :] = resized_frame
|
||||
|
||||
return image, resize_factor
|
||||
|
||||
|
||||
def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
"""
|
||||
Generate anchor boxes for a given image size.
|
||||
|
||||
Args:
|
||||
image_size (Tuple[int, int]): Input image size (width, height). Defaults to (640, 640).
|
||||
|
||||
Returns:
|
||||
np.ndarray: Anchor box coordinates as a NumPy array.
|
||||
"""
|
||||
image_size = image_size
|
||||
|
||||
steps = [8, 16, 32]
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
for i, j in itertools.product(range(map_height), range(map_width)):
|
||||
for min_size in min_sizes[k]:
|
||||
s_kx = min_size / image_size[1]
|
||||
s_ky = min_size / image_size[0]
|
||||
|
||||
dense_cx = [x * step / image_size[1] for x in [j + 0.5]]
|
||||
dense_cy = [y * step / image_size[0] for y in [i + 0.5]]
|
||||
for cy, cx in itertools.product(dense_cy, dense_cx):
|
||||
anchors += [cx, cy, s_kx, s_ky]
|
||||
|
||||
output = np.array(anchors, dtype=np.float32).reshape(-1, 4)
|
||||
return output
|
||||
|
||||
|
||||
def non_max_supression(dets: List[np.ndarray], threshold: float):
|
||||
"""
|
||||
Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
|
||||
|
||||
Args:
|
||||
dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
|
||||
threshold (float): IoU threshold for suppression.
|
||||
|
||||
Returns:
|
||||
list: Indices of bounding boxes retained after suppression.
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= threshold)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def decode_boxes(loc, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode locations from predictions using priors to undo
|
||||
the encoding done for offset regression at train time.
|
||||
|
||||
Args:
|
||||
loc (np.ndarray): Location predictions for loc layers, shape: [num_priors, 4]
|
||||
priors (np.ndarray): Prior boxes in center-offset form, shape: [num_priors, 4]
|
||||
variances (list[float]): Variances of prior boxes
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded bounding box predictions
|
||||
"""
|
||||
# Compute centers of predicted boxes
|
||||
cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
|
||||
|
||||
# Compute widths and heights of predicted boxes
|
||||
wh = priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
|
||||
# Convert center, size to corner coordinates
|
||||
boxes = np.zeros_like(loc)
|
||||
boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
|
||||
boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
|
||||
|
||||
return boxes
|
||||
|
||||
|
||||
def decode_landmarks(predictions, priors, variances=[0.1, 0.2]) -> np.ndarray:
|
||||
"""
|
||||
Decode landmark predictions using prior boxes.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Landmark predictions, shape: [num_priors, 10]
|
||||
priors (np.ndarray): Prior boxes, shape: [num_priors, 4]
|
||||
variances (list): Scaling factors for landmark offsets.
|
||||
|
||||
Returns:
|
||||
np.ndarray: Decoded landmarks, shape: [num_priors, 10]
|
||||
"""
|
||||
|
||||
# Reshape predictions to [num_priors, 5, 2] to process landmark points
|
||||
predictions = predictions.reshape(predictions.shape[0], 5, 2)
|
||||
|
||||
# Expand priors to match (num_priors, 5, 2)
|
||||
priors_xy = np.repeat(priors[:, :2][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
priors_wh = np.repeat(priors[:, 2:][:, np.newaxis, :], 5, axis=1) # (num_priors, 5, 2)
|
||||
|
||||
# Compute absolute landmark positions
|
||||
landmarks = priors_xy + predictions * variances[0] * priors_wh
|
||||
|
||||
# Flatten back to [num_priors, 10]
|
||||
landmarks = landmarks.reshape(landmarks.shape[0], -1)
|
||||
|
||||
return landmarks
|
||||
|
||||
|
||||
def distance2bbox(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to bounding box.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded bounding boxes with shape (n, 4).
|
||||
"""
|
||||
x1 = points[:, 0] - distance[:, 0]
|
||||
y1 = points[:, 1] - distance[:, 1]
|
||||
x2 = points[:, 0] + distance[:, 2]
|
||||
y2 = points[:, 1] + distance[:, 3]
|
||||
if max_shape is not None:
|
||||
x1 = x1.clamp(min=0, max=max_shape[1])
|
||||
y1 = y1.clamp(min=0, max=max_shape[0])
|
||||
x2 = x2.clamp(min=0, max=max_shape[1])
|
||||
y2 = y2.clamp(min=0, max=max_shape[0])
|
||||
else:
|
||||
x1 = np.maximum(x1, 0)
|
||||
y1 = np.maximum(y1, 0)
|
||||
x2 = np.maximum(x2, 0)
|
||||
y2 = np.maximum(y2, 0)
|
||||
|
||||
return np.stack([x1, y1, x2, y2], axis=-1)
|
||||
|
||||
|
||||
def distance2kps(points, distance, max_shape=None):
|
||||
"""Decode distance prediction to keypoints.
|
||||
|
||||
Args:
|
||||
points (Tensor): Shape (n, 2), [x, y].
|
||||
distance (Tensor): Distance from the given point to 4
|
||||
boundaries (left, top, right, bottom).
|
||||
max_shape (tuple): Shape of the image.
|
||||
|
||||
Returns:
|
||||
Tensor: Decoded keypoints with shape (n, 2k).
|
||||
"""
|
||||
preds = []
|
||||
for i in range(0, distance.shape[1], 2):
|
||||
px = points[:, i % 2] + distance[:, i]
|
||||
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
||||
if max_shape is not None:
|
||||
px = px.clamp(min=0, max=max_shape[1])
|
||||
py = py.clamp(min=0, max=max_shape[0])
|
||||
preds.append(px)
|
||||
preds.append(py)
|
||||
return np.stack(preds, axis=-1)
|
||||
51
uniface/face.py
Normal file
51
uniface/face.py
Normal file
@@ -0,0 +1,51 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
__all__ = ['Face']
|
||||
|
||||
|
||||
@dataclass
|
||||
class Face:
|
||||
"""Detected face with analysis results."""
|
||||
|
||||
bbox: np.ndarray
|
||||
confidence: float
|
||||
landmarks: np.ndarray
|
||||
embedding: Optional[np.ndarray] = None
|
||||
age: Optional[int] = None
|
||||
gender_id: Optional[int] = None # 0: Female, 1: Male
|
||||
|
||||
def compute_similarity(self, other: 'Face') -> float:
|
||||
"""Compute cosine similarity with another face."""
|
||||
if self.embedding is None or other.embedding is None:
|
||||
raise ValueError('Both faces must have embeddings for similarity computation')
|
||||
return float(compute_similarity(self.embedding, other.embedding))
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary."""
|
||||
return asdict(self)
|
||||
|
||||
@property
|
||||
def gender(self) -> str:
|
||||
"""Get gender as a string label (Female or Male)."""
|
||||
if self.gender_id is None:
|
||||
return None
|
||||
return 'Female' if self.gender_id == 0 else 'Male'
|
||||
|
||||
def __repr__(self) -> str:
|
||||
parts = [f'Face(confidence={self.confidence:.3f}']
|
||||
if self.age is not None:
|
||||
parts.append(f'age={self.age}')
|
||||
if self.gender_id is not None:
|
||||
parts.append(f'gender={self.gender}')
|
||||
if self.embedding is not None:
|
||||
parts.append(f'embedding_dim={self.embedding.shape[0]}')
|
||||
return ', '.join(parts) + ')'
|
||||
@@ -2,13 +2,18 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
from typing import Tuple, Union
|
||||
|
||||
|
||||
__all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"]
|
||||
__all__ = [
|
||||
'face_alignment',
|
||||
'compute_similarity',
|
||||
'bbox_center_alignment',
|
||||
'transform_points_2d',
|
||||
]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
@@ -18,9 +23,9 @@ reference_alignment: np.ndarray = np.array(
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
@@ -41,7 +46,7 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
AssertionError: If the input landmark array does not have the shape (5, 2)
|
||||
or if image_size is not a multiple of 112 or 128.
|
||||
"""
|
||||
assert landmark.shape == (5, 2), "Landmark array must have shape (5, 2)."
|
||||
assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
|
||||
|
||||
# Handle both int and tuple inputs
|
||||
if isinstance(image_size, tuple):
|
||||
@@ -49,7 +54,7 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
else:
|
||||
size = image_size
|
||||
|
||||
assert size % 112 == 0 or size % 128 == 0, "Image size must be a multiple of 112 or 128."
|
||||
assert size % 112 == 0 or size % 128 == 0, 'Image size must be a multiple of 112 or 128.'
|
||||
|
||||
if size % 112 == 0:
|
||||
ratio = float(size) / 112.0
|
||||
@@ -72,7 +77,11 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
return matrix, inverse_matrix
|
||||
|
||||
|
||||
def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: Union[int, Tuple[int, int]] = 112,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .models import Landmark106
|
||||
from .base import BaseLandmarker
|
||||
from .models import Landmark106
|
||||
|
||||
|
||||
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
@@ -25,8 +25,4 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_landmarker",
|
||||
"Landmark106",
|
||||
"BaseLandmarker"
|
||||
]
|
||||
__all__ = ['create_landmarker', 'Landmark106', 'BaseLandmarker']
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@@ -10,6 +11,7 @@ class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
|
||||
@@ -2,18 +2,20 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import Tuple
|
||||
|
||||
from uniface.log import Logger
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark']
|
||||
__all__ = ['Landmark106']
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
@@ -40,15 +42,13 @@ class Landmark106(BaseLandmarker):
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192)
|
||||
input_size: Tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(
|
||||
f"Initializing Facial Landmark with model={model_name}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
@@ -79,11 +79,11 @@ class Landmark106(BaseLandmarker):
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
|
||||
Logger.info(f'Model initialized with {self.lmk_num} landmarks')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}")
|
||||
raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
@@ -108,8 +108,11 @@ class Landmark106(BaseLandmarker):
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, 1.0 / self.input_std, self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True,
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
@@ -129,7 +132,7 @@ class Landmark106(BaseLandmarker):
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= (self.input_size[0] // 2)
|
||||
landmarks[:, 0:2] *= self.input_size[0] // 2
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
@@ -149,16 +152,13 @@ class Landmark106(BaseLandmarker):
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(
|
||||
self.output_names, {self.input_names[0]: face_blob}
|
||||
)[0][0]
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
@@ -167,7 +167,7 @@ if __name__ == "__main__":
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Webcam not available.")
|
||||
print('Webcam not available.')
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
@@ -175,14 +175,14 @@ if __name__ == "__main__":
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Frame capture failed.")
|
||||
print('Frame capture failed.')
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
@@ -197,14 +197,14 @@ if __name__ == "__main__":
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for (x, y) in landmarks.astype(int):
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
cv2.imshow('Facial Landmark Detection', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import logging
|
||||
|
||||
# Create logger for uniface
|
||||
Logger = logging.getLogger("uniface")
|
||||
Logger = logging.getLogger('uniface')
|
||||
Logger.setLevel(logging.WARNING) # Only show warnings/errors by default
|
||||
Logger.addHandler(logging.NullHandler())
|
||||
|
||||
@@ -19,10 +19,7 @@ def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
))
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
|
||||
Logger.addHandler(handler)
|
||||
Logger.setLevel(level)
|
||||
Logger.propagate = False
|
||||
|
||||
@@ -2,14 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import os
|
||||
import hashlib
|
||||
import os
|
||||
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface.log import Logger
|
||||
import uniface.constants as const
|
||||
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['verify_model_weights']
|
||||
|
||||
@@ -62,12 +62,12 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to download model '{model_name}': {e}")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'") from e
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
os.remove(model_path) # Remove corrupted file
|
||||
Logger.warning("Corrupted weight detected. Removing...")
|
||||
Logger.warning('Corrupted weight detected. Removing...')
|
||||
raise ValueError(f"Hash mismatch for '{model_name}'. The file may be corrupted; please try downloading again.")
|
||||
|
||||
return model_path
|
||||
@@ -78,33 +78,36 @@ def download_file(url: str, dest_path: str) -> None:
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as file, tqdm(
|
||||
desc=f"Downloading {dest_path}",
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024
|
||||
) as progress:
|
||||
with (
|
||||
open(dest_path, 'wb') as file,
|
||||
tqdm(
|
||||
desc=f'Downloading {dest_path}',
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as progress,
|
||||
):
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
progress.update(len(chunk))
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
|
||||
raise ConnectionError(f'Failed to download file from {url}. Error: {e}') from e
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
"""Compute the SHA-256 hash of the file and compare it with the expected hash."""
|
||||
file_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b""):
|
||||
with open(file_path, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(const.CHUNK_SIZE), b''):
|
||||
file_hash.update(chunk)
|
||||
actual_hash = file_hash.hexdigest()
|
||||
if actual_hash != expected_hash:
|
||||
Logger.warning(f"Expected hash: {expected_hash}, but got: {actual_hash}")
|
||||
Logger.warning(f'Expected hash: {expected_hash}, but got: {actual_hash}')
|
||||
return actual_hash == expected_hash
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
model_names = [model.value for model in const.RetinaFaceWeights]
|
||||
|
||||
# Download each model in the list
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
"""
|
||||
Utilities for ONNX Runtime configuration and provider selection.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
import onnxruntime as ort
|
||||
@@ -35,19 +31,19 @@ def get_available_providers() -> List[str]:
|
||||
providers = []
|
||||
|
||||
# Priority order: CoreML > CUDA > CPU
|
||||
if "CoreMLExecutionProvider" in available:
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
Logger.info("CoreML acceleration enabled (Apple Silicon)")
|
||||
if 'CoreMLExecutionProvider' in available:
|
||||
providers.append('CoreMLExecutionProvider')
|
||||
Logger.info('CoreML acceleration enabled (Apple Silicon)')
|
||||
|
||||
if "CUDAExecutionProvider" in available:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
Logger.info("CUDA acceleration enabled (NVIDIA GPU)")
|
||||
if 'CUDAExecutionProvider' in available:
|
||||
providers.append('CUDAExecutionProvider')
|
||||
Logger.info('CUDA acceleration enabled (NVIDIA GPU)')
|
||||
|
||||
# CPU is always available as fallback
|
||||
providers.append("CPUExecutionProvider")
|
||||
providers.append('CPUExecutionProvider')
|
||||
|
||||
if len(providers) == 1:
|
||||
Logger.info("Using CPU execution (no hardware acceleration detected)")
|
||||
Logger.info('Using CPU execution (no hardware acceleration detected)')
|
||||
|
||||
return providers
|
||||
|
||||
@@ -77,11 +73,27 @@ def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.Inf
|
||||
if providers is None:
|
||||
providers = get_available_providers()
|
||||
|
||||
# Suppress ONNX Runtime warnings (e.g., CoreML partition warnings)
|
||||
# Log levels: 0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 3 # Only show ERROR and FATAL
|
||||
|
||||
try:
|
||||
session = ort.InferenceSession(model_path, providers=providers)
|
||||
session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
|
||||
active_provider = session.get_providers()[0]
|
||||
Logger.debug(f"Session created with provider: {active_provider}")
|
||||
Logger.debug(f'Session created with provider: {active_provider}')
|
||||
|
||||
# Show user-friendly message about which provider is being used
|
||||
provider_names = {
|
||||
'CoreMLExecutionProvider': 'CoreML (Apple Silicon)',
|
||||
'CUDAExecutionProvider': 'CUDA (NVIDIA GPU)',
|
||||
'CPUExecutionProvider': 'CPU',
|
||||
}
|
||||
provider_display = provider_names.get(active_provider, active_provider)
|
||||
Logger.debug(f'Model loaded with provider: {active_provider}')
|
||||
print(f'✓ Model loaded ({provider_display})')
|
||||
|
||||
return session
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize ONNX Runtime session: {e}") from e
|
||||
Logger.error(f'Failed to create ONNX session: {e}', exc_info=True)
|
||||
raise RuntimeError(f'Failed to initialize ONNX Runtime session: {e}') from e
|
||||
|
||||
0
uniface/py.typed
Normal file
0
uniface/py.typed
Normal file
@@ -2,10 +2,10 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
from .base import BaseRecognizer
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
"""
|
||||
@@ -54,10 +54,11 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
available = ['arcface', 'mobileface', 'sphereface']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_recognizer",
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
"BaseRecognizer",
|
||||
]
|
||||
'create_recognizer',
|
||||
'ArcFace',
|
||||
'MobileFace',
|
||||
'SphereFace',
|
||||
'BaseRecognizer',
|
||||
]
|
||||
|
||||
@@ -3,13 +3,14 @@
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from typing import Tuple, Union, List
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.log import Logger
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
|
||||
@@ -18,6 +19,7 @@ class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
@@ -28,6 +30,7 @@ class BaseRecognizer(ABC):
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
@@ -63,17 +66,20 @@ class BaseRecognizer(ABC):
|
||||
input_shape = input_cfg.shape
|
||||
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
if model_input_size != self.input_size:
|
||||
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
|
||||
Logger.warning(f'Model input size {model_input_size} differs from configured size {self.input_size}')
|
||||
|
||||
# Extract output configuration
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
self.output_shape = self.session.get_outputs()[0].shape
|
||||
|
||||
assert len(self.output_names) == 1, "Expected only one output node."
|
||||
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
|
||||
assert len(self.output_names) == 1, 'Expected only one output node.'
|
||||
Logger.info(f'Successfully initialized face encoder from {self.model_path}')
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
|
||||
Logger.error(
|
||||
f"Failed to load face encoder model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
||||
|
||||
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
||||
@@ -91,8 +97,9 @@ class BaseRecognizer(ABC):
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
|
||||
np.array(self.input_std, dtype=np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
self.input_std, dtype=np.float32
|
||||
)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
||||
@@ -104,7 +111,7 @@ class BaseRecognizer(ABC):
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
swapRB=True, # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return blob
|
||||
|
||||
@@ -6,9 +6,10 @@ from typing import Optional
|
||||
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
from .base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
|
||||
__all__ = ['ArcFace', 'MobileFace', 'SphereFace']
|
||||
|
||||
|
||||
class ArcFace(BaseRecognizer):
|
||||
@@ -33,14 +34,10 @@ class ArcFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -67,14 +64,10 @@ class MobileFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -101,14 +94,10 @@ class SphereFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -2,26 +2,29 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import List, Union
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import List, Union
|
||||
|
||||
|
||||
def draw_detections(
|
||||
image: np.ndarray,
|
||||
bboxes: Union[np.ndarray, List[List[float]]],
|
||||
bboxes: Union[List[np.ndarray], List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[np.ndarray, List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6
|
||||
landmarks: Union[List[np.ndarray], List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6,
|
||||
):
|
||||
"""
|
||||
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The image to draw on.
|
||||
bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
|
||||
scores (list or np.ndarray): A list of confidence scores.
|
||||
landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
|
||||
bboxes (List[np.ndarray] or List[List[float]]): List of bounding boxes. Each bbox can be
|
||||
np.ndarray with shape (4,) or list [x1, y1, x2, y2].
|
||||
scores (List[float] or np.ndarray): List or array of confidence scores.
|
||||
landmarks (List[np.ndarray] or List[List[List[float]]]): List of landmark sets. Each landmark
|
||||
set can be np.ndarray with shape (5, 2) or nested list [[[x,y],...],...].
|
||||
vis_threshold (float): Confidence threshold for filtering which detections to draw.
|
||||
"""
|
||||
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
@@ -42,8 +45,15 @@ def draw_detections(
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
|
||||
|
||||
# Draw score
|
||||
cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)
|
||||
cv2.putText(
|
||||
image,
|
||||
f'{score:.2f}',
|
||||
(bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(255, 255, 255),
|
||||
thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
for j, point in enumerate(landmark_set):
|
||||
|
||||
Reference in New Issue
Block a user