From 2c78f39e5d27e4a8ddfed659a579235780ee006b Mon Sep 17 00:00:00 2001 From: yakhyo Date: Sat, 15 Nov 2025 21:09:37 +0900 Subject: [PATCH] ref: Add comprehensive test suite and enhance model functionality - Add new test files for age_gender, factory, landmark, recognition, scrfd, and utils - Add new scripts for age_gender, landmarks, and video detection - Update documentation in README.md, MODELS.md, QUICKSTART.md - Improve model constants and face utilities - Update detection models (retinaface, scrfd) with enhanced functionality - Update project configuration in pyproject.toml --- MODELS.md | 30 +-- QUICKSTART.md | 51 +++-- README.md | 34 ++- pyproject.toml | 3 +- scripts/README.md | 97 +++++++- scripts/TESTING.md | 389 -------------------------------- scripts/batch_process.py | 157 +++++++++++++ scripts/download_model.py | 76 +++++-- scripts/run_age_gender.py | 163 +++++++++++++ scripts/run_detection.py | 25 +- scripts/run_face_search.py | 20 +- scripts/run_landmarks.py | 149 ++++++++++++ scripts/run_recognition.py | 79 +++++-- scripts/run_video_detection.py | 142 ++++++++++++ tests/test_age_gender.py | 116 ++++++++++ tests/test_factory.py | 278 +++++++++++++++++++++++ tests/test_landmark.py | 107 +++++++++ tests/test_recognition.py | 211 +++++++++++++++++ tests/test_retinaface.py | 31 --- tests/test_scrfd.py | 71 ++++++ tests/test_utils.py | 247 ++++++++++++++++++++ uniface/__init__.py | 2 +- uniface/constants.py | 54 ++--- uniface/detection/retinaface.py | 6 +- uniface/detection/scrfd.py | 6 +- uniface/face_utils.py | 35 ++- uniface/landmark/models.py | 12 +- uniface/recognition/base.py | 14 +- 28 files changed, 2014 insertions(+), 591 deletions(-) delete mode 100644 scripts/TESTING.md create mode 100644 scripts/batch_process.py create mode 100644 scripts/run_age_gender.py create mode 100644 scripts/run_landmarks.py create mode 100644 scripts/run_video_detection.py create mode 100644 tests/test_age_gender.py create mode 100644 tests/test_factory.py create mode 100644 tests/test_landmark.py create mode 100644 tests/test_recognition.py create mode 100644 tests/test_scrfd.py create mode 100644 tests/test_utils.py diff --git a/MODELS.md b/MODELS.md index 964991d..7ffa396 100644 --- a/MODELS.md +++ b/MODELS.md @@ -113,13 +113,15 @@ embedding = recognizer.get_normalized_embedding(image, landmarks) Lightweight face recognition optimized for mobile devices. -| Model Name | Backbone | Params | Size | Use Case | -|-----------------|-----------------|--------|------|--------------------| -| `MNET_025` | MobileNetV1 0.25| 0.2M | 1MB | Ultra-lightweight | -| `MNET_V2` ⭐ | MobileNetV2 | 1.0M | 4MB | **Mobile/Edge** | -| `MNET_V3_SMALL` | MobileNetV3-S | 0.8M | 3MB | Mobile optimized | -| `MNET_V3_LARGE` | MobileNetV3-L | 2.5M | 10MB | Balanced mobile | +| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case | +|-----------------|-----------------|--------|------|-------|-------|-------|----------|--------------------| +| `MNET_025` | MobileNetV1 0.25| 0.36M | 1MB | 98.76%| 92.02%| 82.37%| 90.02% | Ultra-lightweight | +| `MNET_V2` ⭐ | MobileNetV2 | 2.29M | 4MB | 99.55%| 94.87%| 86.89%| 95.16% | **Mobile/Edge** | +| `MNET_V3_SMALL` | MobileNetV3-S | 1.25M | 3MB | 99.30%| 93.77%| 85.29%| 92.79% | Mobile optimized | +| `MNET_V3_LARGE` | MobileNetV3-L | 3.52M | 10MB | 99.53%| 94.56%| 86.79%| 95.13% | Balanced mobile | +**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities) +**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks **Note**: These models are lightweight alternatives to ArcFace for resource-constrained environments #### Usage @@ -138,12 +140,14 @@ recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) Face recognition using angular softmax loss. -| Model Name | Backbone | Params | Size | Use Case | -|-------------|----------|--------|------|----------------------| -| `SPHERE20` | Sphere20 | 13.0M | 50MB | Research/Comparison | -| `SPHERE36` | Sphere36 | 24.2M | 92MB | Research/Comparison | +| Model Name | Backbone | Params | Size | LFW | CALFW | CPLFW | AgeDB-30 | Use Case | +|-------------|----------|--------|------|-------|-------|-------|----------|----------------------| +| `SPHERE20` | Sphere20 | 24.5M | 50MB | 99.67%| 95.61%| 88.75%| 96.58% | Research/Comparison | +| `SPHERE36` | Sphere36 | 34.6M | 92MB | 99.72%| 95.64%| 89.92%| 96.83% | Research/Comparison | -**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace +**Dataset**: Trained on MS1M-V2 (5.8M images, 85K identities) +**Accuracy**: Evaluated on LFW, CALFW, CPLFW, and AgeDB-30 benchmarks +**Note**: SphereFace uses angular softmax loss, an earlier approach before ArcFace. These models provide good accuracy with moderate resource requirements. #### Usage @@ -264,10 +268,10 @@ emotion, confidence = predictor.predict(image, landmarks) ### By Hardware #### Apple Silicon (M1/M2/M3/M4) -**Recommended**: All models work well with CoreML acceleration +**Recommended**: All models work well with ARM64 optimizations (automatically included) ```bash -pip install uniface[silicon] +pip install uniface ``` **Recommended models**: diff --git a/QUICKSTART.md b/QUICKSTART.md index 0d0aa0e..c065736 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -7,8 +7,8 @@ Get up and running with UniFace in 5 minutes! This guide covers the most common ## Installation ```bash -# macOS (Apple Silicon) -pip install uniface[silicon] +# macOS (Apple Silicon) - automatically includes ARM64 optimizations +pip install uniface # Linux/Windows with NVIDIA GPU pip install uniface[gpu] @@ -114,9 +114,9 @@ if faces1 and faces2: # Interpret result if similarity > 0.6: - print(f"✅ Same person (similarity: {similarity:.3f})") + print(f"Same person (similarity: {similarity:.3f})") else: - print(f"❌ Different people (similarity: {similarity:.3f})") + print(f"Different people (similarity: {similarity:.3f})") else: print("No faces detected") ``` @@ -264,31 +264,46 @@ print("Done!") Choose the right model for your use case: +### Detection Models + ```python -from uniface import create_detector +from uniface.detection import RetinaFace, SCRFD from uniface.constants import RetinaFaceWeights, SCRFDWeights # Fast detection (mobile/edge devices) -detector = create_detector( - 'retinaface', +detector = RetinaFace( model_name=RetinaFaceWeights.MNET_025, conf_thresh=0.7 ) # Balanced (recommended) -detector = create_detector( - 'retinaface', +detector = RetinaFace( model_name=RetinaFaceWeights.MNET_V2 ) # High accuracy (server/GPU) -detector = create_detector( - 'scrfd', +detector = SCRFD( model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5 ) ``` +### Recognition Models + +```python +from uniface import ArcFace, MobileFace, SphereFace +from uniface.constants import MobileFaceWeights, SphereFaceWeights + +# ArcFace (recommended for most use cases) +recognizer = ArcFace() # Best accuracy + +# MobileFace (lightweight for mobile/edge) +recognizer = MobileFace(model_name=MobileFaceWeights.MNET_V2) # Fast, small size + +# SphereFace (angular margin approach) +recognizer = SphereFace(model_name=SphereFaceWeights.SPHERE20) # Alternative method +``` + --- ## Common Issues @@ -316,20 +331,22 @@ print("Available providers:", ort.get_available_providers()) ### 3. Slow Performance on Mac -Make sure you installed with CoreML support: +The standard installation includes ARM64 optimizations for Apple Silicon. If performance is slow, verify you're using the ARM64 build of Python: ```bash -pip install uniface[silicon] +python -c "import platform; print(platform.machine())" +# Should show: arm64 (not x86_64) ``` ### 4. Import Errors ```python -# ✅ Correct imports -from uniface import RetinaFace, ArcFace, Landmark106 -from uniface.detection import create_detector +# Correct imports +from uniface.detection import RetinaFace +from uniface.recognition import ArcFace +from uniface.landmark import Landmark106 -# ❌ Wrong imports +# Wrong imports from uniface import retinaface # Module, not class ``` diff --git a/README.md b/README.md index f267377..1d94db7 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ - **Face Recognition**: ArcFace, MobileFace, and SphereFace embeddings - **Attribute Analysis**: Age, gender, and emotion detection - **Face Alignment**: Precise alignment for downstream tasks -- **Hardware Acceleration**: CoreML (Apple Silicon), CUDA (NVIDIA), CPU fallback +- **Hardware Acceleration**: ARM64 optimizations (Apple Silicon), CUDA (NVIDIA), CPU fallback - **Simple API**: Intuitive factory functions and clean interfaces - **Production-Ready**: Type hints, comprehensive logging, PEP8 compliant @@ -39,27 +39,19 @@ pip install uniface #### macOS (Apple Silicon - M1/M2/M3/M4) -For optimal performance with **CoreML acceleration** (3-5x faster): +For Apple Silicon Macs, the standard installation automatically includes optimized ARM64 support: ```bash -# Standard installation (CPU only) pip install uniface - -# With CoreML acceleration (recommended for M-series chips) -pip install uniface[silicon] ``` -**Verify CoreML is available:** -```python -import onnxruntime as ort -print(ort.get_available_providers()) -# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider'] -``` +The base `onnxruntime` package (included with uniface) has native Apple Silicon support with ARM64 optimizations built-in since version 1.13+. #### Linux/Windows with NVIDIA GPU +For CUDA acceleration on NVIDIA GPUs: + ```bash -# With CUDA acceleration pip install uniface[gpu] ``` @@ -172,28 +164,29 @@ print(f"{gender}, {age} years old") ### Factory Functions (Recommended) ```python -from uniface import create_detector, create_recognizer, create_landmarker +from uniface.detection import RetinaFace, SCRFD +from uniface.recognition import ArcFace +from uniface.landmark import Landmark106 # Create detector with default settings -detector = create_detector('retinaface') +detector = RetinaFace() # Create with custom config -detector = create_detector( - 'scrfd', +detector = SCRFD( model_name='scrfd_10g_kps', conf_thresh=0.8, input_size=(640, 640) ) # Recognition and landmarks -recognizer = create_recognizer('arcface') -landmarker = create_landmarker('2d106det') +recognizer = ArcFace() +landmarker = Landmark106() ``` ### Direct Model Instantiation ```python -from uniface import RetinaFace, SCRFD, ArcFace, MobileFace +from uniface import RetinaFace, SCRFD, ArcFace, MobileFace, SphereFace from uniface.constants import RetinaFaceWeights # Detection @@ -206,6 +199,7 @@ detector = RetinaFace( # Recognition recognizer = ArcFace() # Uses default weights recognizer = MobileFace() # Lightweight alternative +recognizer = SphereFace() # Angular softmax alternative ``` ### High-Level Detection API diff --git a/pyproject.toml b/pyproject.toml index 59ba8f5..1ebc38c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uniface" -version = "1.0.0" +version = "1.0.1" description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection" readme = "README.md" license = { text = "MIT" } @@ -21,7 +21,6 @@ requires-python = ">=3.10" [project.optional-dependencies] dev = ["pytest>=7.0.0"] gpu = ["onnxruntime-gpu>=1.16.0"] -silicon = ["onnxruntime-silicon>=1.16.0"] [project.urls] Homepage = "https://github.com/yakhyo/uniface" diff --git a/scripts/README.md b/scripts/README.md index d2cb3c5..aa952e6 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,18 +1,97 @@ -### `download_model.py` +# Scripts -# Download all models +Collection of example scripts demonstrating UniFace functionality. + +## Available Scripts + +- `run_detection.py` - Face detection on images +- `run_age_gender.py` - Age and gender prediction +- `run_landmarks.py` - Facial landmark detection +- `run_recognition.py` - Face recognition and embeddings +- `run_face_search.py` - Face search and matching +- `run_video_detection.py` - Video processing with face detection +- `batch_process.py` - Batch processing of image folders +- `download_model.py` - Download and manage models + +## Quick Start ```bash -python scripts/download_model.py +# Face detection +python scripts/run_detection.py --image assets/test.jpg + +# Age and gender detection +python scripts/run_age_gender.py --image assets/test.jpg + +# Webcam demo +python scripts/run_age_gender.py --webcam + +# Batch processing +python scripts/batch_process.py --input images/ --output results/ ``` -# Download just RESNET18 +## Import Examples -```bash -python scripts/download_model.py --model RESNET18 +The scripts use direct class imports for better developer experience: + +```python +# Face Detection +from uniface.detection import RetinaFace, SCRFD + +detector = RetinaFace() # or SCRFD() +faces = detector.detect(image) + +# Face Recognition +from uniface.recognition import ArcFace, MobileFace, SphereFace + +recognizer = ArcFace() # or MobileFace(), SphereFace() +embedding = recognizer.get_embedding(image, landmarks) + +# Age & Gender +from uniface.attribute import AgeGender + +age_gender = AgeGender() +gender, age = age_gender.predict(image, bbox) + +# Landmarks +from uniface.landmark import Landmark106 + +landmarker = Landmark106() +landmarks = landmarker.get_landmarks(image, bbox) ``` -### `run_inference.py` +## Available Classes + +**Detection:** +- `RetinaFace` - High accuracy face detection +- `SCRFD` - Fast face detection + +**Recognition:** +- `ArcFace` - High accuracy face recognition +- `MobileFace` - Lightweight face recognition +- `SphereFace` - Alternative face recognition + +**Attributes:** +- `AgeGender` - Age and gender prediction + +**Landmarks:** +- `Landmark106` - 106-point facial landmarks + +## Common Options + +Most scripts support: +- `--help` - Show usage information +- `--verbose` - Enable detailed logging +- `--detector` - Choose detector (retinaface, scrfd) +- `--threshold` - Set confidence threshold + +## Testing + +Run basic functionality test: ```bash -python scripts/run_inference.py --image assets/test.jpg --model MNET_V2 --iterations 10 -``` \ No newline at end of file +python scripts/run_detection.py --image assets/test.jpg +``` + +For comprehensive testing, see the main project tests: +```bash +pytest tests/ +``` diff --git a/scripts/TESTING.md b/scripts/TESTING.md deleted file mode 100644 index 38b8e43..0000000 --- a/scripts/TESTING.md +++ /dev/null @@ -1,389 +0,0 @@ -# Testing Scripts Guide - -Complete guide to testing all scripts in the `scripts/` directory. - ---- - -## 📁 Available Scripts - -1. **download_model.py** - Download and verify model weights -2. **run_detection.py** - Face detection on images -3. **run_recognition.py** - Face recognition (extract embeddings) -4. **run_face_search.py** - Real-time face matching with webcam -5. **sha256_generate.py** - Generate SHA256 checksums for models - ---- - -## Testing Each Script - -### 1. Test Model Download - -```bash -# Download a specific model -python scripts/download_model.py --model MNET_V2 - -# Download all RetinaFace models (takes ~5 minutes, ~200MB) -python scripts/download_model.py - -# Verify models are cached -ls -lh ~/.uniface/models/ -``` - -**Expected Output:** -``` -📥 Downloading model: retinaface_mnet_v2 -2025-11-08 00:00:00 - INFO - Downloading model 'RetinaFaceWeights.MNET_V2' from https://... -Downloading ~/.uniface/models/retinaface_mnet_v2.onnx: 100%|████| 3.5M/3.5M -2025-11-08 00:00:05 - INFO - Successfully downloaded 'RetinaFaceWeights.MNET_V2' -✅ All requested weights are ready and verified. -``` - ---- - -### 2. Test Face Detection - -```bash -# Basic detection -python scripts/run_detection.py --image assets/test.jpg - -# With custom settings -python scripts/run_detection.py \ - --image assets/test.jpg \ - --method scrfd \ - --threshold 0.7 \ - --save_dir outputs - -# Benchmark mode (100 iterations) -python scripts/run_detection.py \ - --image assets/test.jpg \ - --iterations 100 -``` - -**Expected Output:** -``` -Initializing detector: retinaface -2025-11-08 00:00:00 - INFO - Initializing RetinaFace with model=RetinaFaceWeights.MNET_V2... -2025-11-08 00:00:01 - INFO - CoreML acceleration enabled (Apple Silicon) -✅ Output saved at: outputs/test_out.jpg -[1/1] ⏱️ Inference time: 0.0234 seconds -``` - -**Verify Output:** -```bash -# Check output image was created -ls -lh outputs/test_out.jpg - -# View the image (macOS) -open outputs/test_out.jpg -``` - ---- - -### 3. Test Face Recognition (Embedding Extraction) - -```bash -# Extract embeddings from an image -python scripts/run_recognition.py --image assets/test.jpg - -# With different models -python scripts/run_recognition.py \ - --image assets/test.jpg \ - --detector scrfd \ - --recognizer mobileface -``` - -**Expected Output:** -``` -Initializing detector: retinaface -Initializing recognizer: arcface -2025-11-08 00:00:00 - INFO - Successfully initialized face encoder from ~/.uniface/models/w600k_mbf.onnx -Detected 1 face(s). Extracting embeddings for the first face... - - Embedding shape: (1, 512) - - L2 norm of unnormalized embedding: 64.2341 - - L2 norm of normalized embedding: 1.0000 -``` - ---- - -### 4. Test Real-Time Face Search (Webcam) - -**Prerequisites:** -- Webcam connected -- Reference image with a clear face - -```bash -# Basic usage -python scripts/run_face_search.py --image assets/test.jpg - -# With custom models -python scripts/run_face_search.py \ - --image assets/test.jpg \ - --detector scrfd \ - --recognizer arcface -``` - -**Expected Behavior:** -1. Webcam window opens -2. Faces are detected in real-time -3. Green box = Match (similarity > 0.4) -4. Red box = Unknown (similarity < 0.4) -5. Press 'q' to quit - -**Expected Output:** -``` -Initializing models... -2025-11-08 00:00:00 - INFO - CoreML acceleration enabled (Apple Silicon) -Extracting reference embedding... -Webcam started. Press 'q' to quit. -``` - -**Troubleshooting:** -```bash -# If webcam doesn't open -python -c "import cv2; cap = cv2.VideoCapture(0); print('Webcam OK' if cap.isOpened() else 'Webcam FAIL')" - -# If no faces detected -# - Ensure good lighting -# - Face should be frontal and clearly visible -# - Try lowering threshold: edit script line 29, change 0.4 to 0.3 -``` - ---- - -### 5. Test SHA256 Generator (For Developers) - -```bash -# Generate checksum for a model file -python scripts/sha256_generate.py ~/.uniface/models/retinaface_mnet_v2.onnx - -# Generate for all models -for model in ~/.uniface/models/*.onnx; do - python scripts/sha256_generate.py "$model" -done -``` - ---- - -## 🔍 Quick Verification Tests - -### Test 1: Imports Work - -```bash -python -c " -from uniface.detection import create_detector -from uniface.recognition import create_recognizer -print('✅ Imports successful') -" -``` - -### Test 2: Models Download - -```bash -python -c " -from uniface import RetinaFace -detector = RetinaFace() -print('✅ Model downloaded and loaded') -" -``` - -### Test 3: Detection Works - -```bash -python -c " -import cv2 -import numpy as np -from uniface import RetinaFace - -detector = RetinaFace() -image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) -faces = detector.detect(image) -print(f'✅ Detection works, found {len(faces)} faces') -" -``` - -### Test 4: Recognition Works - -```bash -python -c " -import cv2 -import numpy as np -from uniface import RetinaFace, ArcFace - -detector = RetinaFace() -recognizer = ArcFace() -image = cv2.imread('assets/test.jpg') -faces = detector.detect(image) -if faces: - landmarks = np.array(faces[0]['landmarks']) - embedding = recognizer.get_normalized_embedding(image, landmarks) - print(f'✅ Recognition works, embedding shape: {embedding.shape}') -else: - print('⚠️ No faces detected in test image') -" -``` - ---- - -## End-to-End Test Workflow - -Run this complete workflow to verify everything works: - -```bash -#!/bin/bash -# Save as test_all_scripts.sh - -echo "=== Testing UniFace Scripts ===" -echo "" - -# Test 1: Download models -echo "1️⃣ Testing model download..." -python scripts/download_model.py --model MNET_V2 -if [ $? -eq 0 ]; then - echo "✅ Model download: PASS" -else - echo "❌ Model download: FAIL" - exit 1 -fi -echo "" - -# Test 2: Face detection -echo "2️⃣ Testing face detection..." -python scripts/run_detection.py --image assets/test.jpg --save_dir /tmp/uniface_test -if [ $? -eq 0 ] && [ -f /tmp/uniface_test/test_out.jpg ]; then - echo "✅ Face detection: PASS" -else - echo "❌ Face detection: FAIL" - exit 1 -fi -echo "" - -# Test 3: Face recognition -echo "3️⃣ Testing face recognition..." -python scripts/run_recognition.py --image assets/test.jpg > /tmp/uniface_recognition.log -if [ $? -eq 0 ] && grep -q "Embedding shape" /tmp/uniface_recognition.log; then - echo "✅ Face recognition: PASS" -else - echo "❌ Face recognition: FAIL" - exit 1 -fi -echo "" - -echo "=== All Tests Passed! 🎉 ===" -``` - -**Run the test suite:** -```bash -chmod +x test_all_scripts.sh -./test_all_scripts.sh -``` - ---- - -## Performance Benchmarking - -### Benchmark Detection Speed - -```bash -# Test different models -for model in retinaface scrfd; do - echo "Testing $model..." - python scripts/run_detection.py \ - --image assets/test.jpg \ - --method $model \ - --iterations 50 -done -``` - -### Benchmark Recognition Speed - -```bash -# Test different recognizers -for recognizer in arcface mobileface; do - echo "Testing $recognizer..." - time python scripts/run_recognition.py \ - --image assets/test.jpg \ - --recognizer $recognizer -done -``` - ---- - -## 🐛 Common Issues - -### Issue: "No module named 'uniface'" - -```bash -# Solution: Install in editable mode -pip install -e . -``` - -### Issue: "Failed to load image" - -```bash -# Check image exists -ls -lh assets/test.jpg - -# Try with absolute path -python scripts/run_detection.py --image $(pwd)/assets/test.jpg -``` - -### Issue: "No faces detected" - -```bash -# Lower confidence threshold -python scripts/run_detection.py \ - --image assets/test.jpg \ - --threshold 0.3 -``` - -### Issue: Models downloading slowly - -```bash -# Check internet connection -curl -I https://github.com/yakhyo/uniface/releases - -# Or download manually -wget https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv2.onnx \ - -O ~/.uniface/models/retinaface_mnet_v2.onnx -``` - -### Issue: CoreML not available on Mac - -```bash -# Install CoreML-enabled ONNX Runtime -pip uninstall onnxruntime -pip install onnxruntime-silicon - -# Verify -python -c "import onnxruntime as ort; print(ort.get_available_providers())" -# Should show: ['CoreMLExecutionProvider', 'CPUExecutionProvider'] -``` - ---- - -## ✅ Script Status Summary - -| Script | Status | API Updated | Tested | -|-----------------------|--------|-------------|--------| -| download_model.py | ✅ | ✅ | ✅ | -| run_detection.py | ✅ | ✅ | ✅ | -| run_recognition.py | ✅ | ✅ | ✅ | -| run_face_search.py | ✅ | ✅ | ✅ | -| sha256_generate.py | ✅ | N/A | ✅ | - -All scripts are updated and working with the new dict-based API! 🎉 - ---- - -## 📝 Notes - -- All scripts now use the factory functions (`create_detector`, `create_recognizer`) -- Scripts work with the new dict-based detection API -- Model download bug is fixed (enum vs string issue) -- CoreML acceleration is automatically detected on Apple Silicon -- All scripts include proper error handling - ---- - -Need help with a specific script? Check the main [README.md](../README.md) or [QUICKSTART.md](../QUICKSTART.md)! - diff --git a/scripts/batch_process.py b/scripts/batch_process.py new file mode 100644 index 0000000..abd3320 --- /dev/null +++ b/scripts/batch_process.py @@ -0,0 +1,157 @@ +"""Batch Image Processing Script""" + +import os +import cv2 +import argparse +from pathlib import Path +from tqdm import tqdm + +from uniface import RetinaFace, SCRFD +from uniface.visualization import draw_detections + + +def get_image_files(input_dir: Path, extensions: tuple) -> list: + image_files = [] + for ext in extensions: + image_files.extend(input_dir.glob(f"*.{ext}")) + image_files.extend(input_dir.glob(f"*.{ext.upper()}")) + + return sorted(image_files) + + +def process_single_image(detector, image_path: Path, output_dir: Path, + vis_threshold: float, skip_existing: bool) -> dict: + output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}" + + # Skip if already processed + if skip_existing and output_path.exists(): + return {"status": "skipped", "faces": 0} + + # Load image + image = cv2.imread(str(image_path)) + if image is None: + return {"status": "error", "error": "Failed to load image"} + + # Detect faces + try: + faces = detector.detect(image) + except Exception as e: + return {"status": "error", "error": str(e)} + + # Draw detections + bboxes = [f['bbox'] for f in faces] + scores = [f['confidence'] for f in faces] + landmarks = [f['landmarks'] for f in faces] + draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold) + + # Add face count + cv2.putText(image, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + + # Save result + cv2.imwrite(str(output_path), image) + + return {"status": "success", "faces": len(faces)} + + +def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple, + vis_threshold: float, skip_existing: bool): + input_path = Path(input_dir) + output_path = Path(output_dir) + + # Create output directory + output_path.mkdir(parents=True, exist_ok=True) + + # Get image files + image_files = get_image_files(input_path, extensions) + + if not image_files: + print(f"No image files found in '{input_dir}' with extensions {extensions}") + return + + print(f"Input: {input_dir}") + print(f"Output: {output_dir}") + print(f"Found {len(image_files)} images\n") + + # Process images + results = { + "success": 0, + "skipped": 0, + "error": 0, + "total_faces": 0 + } + + with tqdm(image_files, desc="Processing images", unit="img") as pbar: + for image_path in pbar: + result = process_single_image( + detector, image_path, output_path, + vis_threshold, skip_existing + ) + + if result["status"] == "success": + results["success"] += 1 + results["total_faces"] += result["faces"] + pbar.set_postfix({"faces": result["faces"]}) + elif result["status"] == "skipped": + results["skipped"] += 1 + else: + results["error"] += 1 + print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}") + + # Print summary + print(f"\nBatch processing complete!") + print(f" Total images: {len(image_files)}") + print(f" Successfully processed: {results['success']}") + print(f" Skipped: {results['skipped']}") + print(f" Errors: {results['error']}") + print(f" Total faces detected: {results['total_faces']}") + if results['success'] > 0: + print(f" Average faces per image: {results['total_faces']/results['success']:.2f}") + print(f"\nResults saved to: {output_dir}") + + +def main(): + parser = argparse.ArgumentParser(description="Batch process images with face detection") + parser.add_argument("--input", type=str, required=True, + help="Input directory containing images") + parser.add_argument("--output", type=str, required=True, + help="Output directory for processed images") + parser.add_argument("--detector", type=str, default="retinaface", + choices=['retinaface', 'scrfd'], help="Face detector to use") + parser.add_argument("--threshold", type=float, default=0.6, + help="Confidence threshold for visualization") + parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", + help="Comma-separated list of image extensions") + parser.add_argument("--skip_existing", action="store_true", + help="Skip files that already exist in output directory") + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + + args = parser.parse_args() + + # Check input directory exists + if not Path(args.input).exists(): + print(f"Error: Input directory '{args.input}' does not exist") + return + + if args.verbose: + from uniface import enable_logging + enable_logging() + + # Parse extensions + extensions = tuple(ext.strip() for ext in args.extensions.split(',')) + + # Initialize detector + print(f"Initializing detector: {args.detector}") + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() + print("Detector initialized\n") + + # Process batch + batch_process(detector, args.input, args.output, extensions, + args.threshold, args.skip_existing) + + +if __name__ == "__main__": + main() diff --git a/scripts/download_model.py b/scripts/download_model.py index 8f5d0b2..269198e 100644 --- a/scripts/download_model.py +++ b/scripts/download_model.py @@ -1,31 +1,77 @@ import argparse -from uniface.constants import RetinaFaceWeights +from uniface.constants import ( + RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights, + SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights +) from uniface.model_store import verify_model_weights +# All available model types +ALL_MODEL_TYPES = { + 'retinaface': RetinaFaceWeights, + 'sphereface': SphereFaceWeights, + 'mobileface': MobileFaceWeights, + 'arcface': ArcFaceWeights, + 'scrfd': SCRFDWeights, + 'ddamfn': DDAMFNWeights, + 'agegender': AgeGenderWeights, + 'landmark': LandmarkWeights, +} + + def main(): - parser = argparse.ArgumentParser(description="Download and verify RetinaFace model weights.") + parser = argparse.ArgumentParser(description="Download and verify model weights.") + parser.add_argument( + "--model-type", + type=str, + choices=list(ALL_MODEL_TYPES.keys()), + help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.", + ) parser.add_argument( "--model", type=str, - choices=[m.name for m in RetinaFaceWeights], - help="Model to download (e.g. MNET_V2). If not specified, all models will be downloaded.", + help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.", ) args = parser.parse_args() - if args.model: - weight = RetinaFaceWeights[args.model] - print(f"📥 Downloading model: {weight.value}") - verify_model_weights(weight) # Pass enum, not string - else: - print("📥 Downloading all models...") - for weight in RetinaFaceWeights: - verify_model_weights(weight) # Pass enum, not string + if args.model and not args.model_type: + # Backward compatibility - assume RetinaFace + try: + weight = RetinaFaceWeights[args.model] + print(f"Downloading RetinaFace model: {weight.value}") + verify_model_weights(weight) + print("Model downloaded successfully.") + except KeyError: + print(f"Invalid RetinaFace model: {args.model}") + print(f"Available models: {[m.name for m in RetinaFaceWeights]}") + return - print("✅ All requested weights are ready and verified.") + if args.model_type: + # Download all models from specific type + model_enum = ALL_MODEL_TYPES[args.model_type] + print(f"Downloading all {args.model_type} models...") + for weight in model_enum: + print(f"Downloading: {weight.value}") + try: + verify_model_weights(weight) + print(f"Downloaded: {weight.value}") + except Exception as e: + print(f"Failed to download {weight.value}: {e}") + else: + # Download all models from all types + print("Downloading all models...") + for model_type, model_enum in ALL_MODEL_TYPES.items(): + print(f"\nDownloading {model_type} models...") + for weight in model_enum: + print(f"Downloading: {weight.value}") + try: + verify_model_weights(weight) + print(f"Downloaded: {weight.value}") + except Exception as e: + print(f"Failed to download {weight.value}: {e}") + + print("\nDownload process completed.") if __name__ == "__main__": main() - - diff --git a/scripts/run_age_gender.py b/scripts/run_age_gender.py new file mode 100644 index 0000000..f0e57cd --- /dev/null +++ b/scripts/run_age_gender.py @@ -0,0 +1,163 @@ +"""Age and Gender Detection Demo Script""" + +import os +import cv2 +import argparse +from pathlib import Path + +from uniface import RetinaFace, SCRFD, AgeGender +from uniface.visualization import draw_detections + + +def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6): + image = cv2.imread(image_path) + if image is None: + print(f"Error: Failed to load image from '{image_path}'") + return + + print(f"Processing: {image_path}") + + # Detect faces + faces = detector.detect(image) + print(f" Detected {len(faces)} face(s)") + + if not faces: + print(" No faces detected") + return + + # Draw detections + bboxes = [f['bbox'] for f in faces] + scores = [f['confidence'] for f in faces] + landmarks = [f['landmarks'] for f in faces] + draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold) + + # Predict and draw age/gender for each face + for i, face in enumerate(faces): + gender, age = age_gender.predict(image, face['bbox']) + print(f" Face {i+1}: {gender}, {age} years old") + + # Draw age and gender text + bbox = face['bbox'] + x1, y1 = int(bbox[0]), int(bbox[1]) + text = f"{gender}, {age}y" + + # Background rectangle for text + (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) + cv2.rectangle(image, (x1, y1 - text_height - 10), + (x1 + text_width + 10, y1), (0, 255, 0), -1) + cv2.putText(image, text, (x1 + 5, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2) + + # Save result + os.makedirs(save_dir, exist_ok=True) + output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg") + cv2.imwrite(output_path, image) + print(f"Output saved: {output_path}") + + +def run_webcam(detector, age_gender, vis_threshold: float = 0.6): + cap = cv2.VideoCapture(0) + + if not cap.isOpened(): + print("Cannot open webcam") + return + + print("Webcam opened") + print("Press 'q' to quit\n") + + frame_count = 0 + + try: + while True: + ret, frame = cap.read() + if not ret: + break + + frame_count += 1 + + # Detect faces + faces = detector.detect(frame) + + # Draw detections + bboxes = [f['bbox'] for f in faces] + scores = [f['confidence'] for f in faces] + landmarks = [f['landmarks'] for f in faces] + draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold) + + # Predict and draw age/gender for each face + for face in faces: + gender, age = age_gender.predict(frame, face['bbox']) + + # Draw age and gender text + bbox = face['bbox'] + x1, y1 = int(bbox[0]), int(bbox[1]) + text = f"{gender}, {age}y" + + # Background rectangle for text + (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) + cv2.rectangle(frame, (x1, y1 - text_height - 10), + (x1 + text_width + 10, y1), (0, 255, 0), -1) + cv2.putText(frame, text, (x1 + 5, y1 - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2) + + # Add info + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + + cv2.imshow("Age & Gender Detection", frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + except KeyboardInterrupt: + print("\nInterrupted") + finally: + cap.release() + cv2.destroyAllWindows() + print(f"\nProcessed {frame_count} frames") + + +def main(): + parser = argparse.ArgumentParser(description="Run age and gender detection") + parser.add_argument("--image", type=str, help="Path to input image") + parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image") + parser.add_argument("--detector", type=str, default="retinaface", + choices=['retinaface', 'scrfd'], help="Face detector to use") + parser.add_argument("--threshold", type=float, default=0.6, + help="Confidence threshold for visualization") + parser.add_argument("--save_dir", type=str, default="outputs", + help="Directory to save output images") + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + + args = parser.parse_args() + + # Validate input + if not args.image and not args.webcam: + parser.error("Either --image or --webcam must be specified") + + if args.verbose: + from uniface import enable_logging + enable_logging() + + # Initialize models + print(f"Initializing detector: {args.detector}") + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() + + print("Initializing age/gender model...") + age_gender = AgeGender() + print("Models initialized\n") + + # Process + if args.webcam: + run_webcam(detector, age_gender, args.threshold) + else: + process_image(detector, age_gender, args.image, args.save_dir, args.threshold) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_detection.py b/scripts/run_detection.py index b89a296..44b273e 100644 --- a/scripts/run_detection.py +++ b/scripts/run_detection.py @@ -4,24 +4,14 @@ import time import argparse import numpy as np -# UPDATED: Use the factory function and import from the new location -from uniface.detection import create_detector +from uniface.detection import RetinaFace, SCRFD from uniface.visualization import draw_detections def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"): - """ - Run face detection on a single image. - - Args: - detector: Initialized face detector. - image_path (str): Path to input image. - vis_threshold (float): Threshold for drawing detections. - save_dir (str): Directory to save output image. - """ image = cv2.imread(image_path) if image is None: - print(f"❌ Error: Failed to load image from '{image_path}'") + print(f"Error: Failed to load image from '{image_path}'") return # 1. Get the list of face dictionaries from the detector @@ -40,7 +30,7 @@ def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_di os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg") cv2.imwrite(output_path, image) - print(f"✅ Output saved at: {output_path}") + print(f"Output saved at: {output_path}") def main(): @@ -65,14 +55,17 @@ def main(): enable_logging() print(f"Initializing detector: {args.method}") - detector = create_detector(method=args.method) + if args.method == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() avg_time = 0 for i in range(args.iterations): start = time.time() run_inference(detector, args.image, args.threshold, args.save_dir) elapsed = time.time() - start - print(f"[{i + 1}/{args.iterations}] ⏱️ Inference time: {elapsed:.4f} seconds") + print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds") if i >= 0: # Avoid counting the first run if it includes model loading time avg_time += elapsed @@ -80,7 +73,7 @@ def main(): # Adjust average calculation to exclude potential first-run overhead effective_iterations = max(1, args.iterations) print( - f"\n🔥 Average inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds") + f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds") if __name__ == "__main__": diff --git a/scripts/run_face_search.py b/scripts/run_face_search.py index 1f93e2c..5530b7f 100644 --- a/scripts/run_face_search.py +++ b/scripts/run_face_search.py @@ -3,14 +3,12 @@ import argparse import cv2 import numpy as np -# Use the new high-level factory functions -from uniface.detection import create_detector +from uniface.detection import RetinaFace, SCRFD from uniface.face_utils import compute_similarity -from uniface.recognition import create_recognizer +from uniface.recognition import ArcFace, MobileFace, SphereFace def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray: - """Extracts a normalized embedding from the first face found in an image.""" image = cv2.imread(image_path) if image is None: raise RuntimeError(f"Failed to load image: {image_path}") @@ -28,7 +26,6 @@ def extract_reference_embedding(detector, recognizer, image_path: str) -> np.nda def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4): - """Run real-time face recognition from a webcam feed.""" cap = cv2.VideoCapture(0) if not cap.isOpened(): raise RuntimeError("Webcam could not be opened.") @@ -91,8 +88,17 @@ def main(): enable_logging() print("Initializing models...") - detector = create_detector(method=args.detector) - recognizer = create_recognizer(method=args.recognizer) + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() + + if args.recognizer == 'arcface': + recognizer = ArcFace() + elif args.recognizer == 'mobileface': + recognizer = MobileFace() + else: + recognizer = SphereFace() print("Extracting reference embedding...") ref_embedding = extract_reference_embedding(detector, recognizer, args.image) diff --git a/scripts/run_landmarks.py b/scripts/run_landmarks.py new file mode 100644 index 0000000..a14958d --- /dev/null +++ b/scripts/run_landmarks.py @@ -0,0 +1,149 @@ +"""Facial Landmark Detection Demo Script""" + +import os +import cv2 +import argparse +from pathlib import Path + +from uniface import RetinaFace, SCRFD, Landmark106 + + +def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"): + image = cv2.imread(image_path) + if image is None: + print(f"Error: Failed to load image from '{image_path}'") + return + + print(f"Processing: {image_path}") + + # Detect faces + faces = detector.detect(image) + print(f" Detected {len(faces)} face(s)") + + if not faces: + print(" No faces detected") + return + + # Process each face + for i, face in enumerate(faces): + # Draw bounding box + bbox = face['bbox'] + x1, y1, x2, y2 = map(int, bbox) + cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # Get and draw 106 landmarks + landmarks = landmarker.get_landmarks(image, bbox) + print(f" Face {i+1}: Extracted {len(landmarks)} landmarks") + + for x, y in landmarks.astype(int): + cv2.circle(image, (x, y), 1, (0, 255, 0), -1) + + # Add face count + cv2.putText(image, f"Face {i+1}", (x1, y1 - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + + # Add total count + cv2.putText(image, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + + # Save result + os.makedirs(save_dir, exist_ok=True) + output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg") + cv2.imwrite(output_path, image) + print(f"Output saved: {output_path}") + + +def run_webcam(detector, landmarker): + cap = cv2.VideoCapture(0) + + if not cap.isOpened(): + print("Cannot open webcam") + return + + print("Webcam opened") + print("Press 'q' to quit\n") + + frame_count = 0 + + try: + while True: + ret, frame = cap.read() + if not ret: + break + + frame_count += 1 + + # Detect faces + faces = detector.detect(frame) + + # Process each face + for face in faces: + # Draw bounding box + bbox = face['bbox'] + x1, y1, x2, y2 = map(int, bbox) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + + # Get and draw 106 landmarks + landmarks = landmarker.get_landmarks(frame, bbox) + for x, y in landmarks.astype(int): + cv2.circle(frame, (x, y), 1, (0, 255, 0), -1) + + # Add info + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) + + cv2.imshow("106-Point Landmarks", frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + except KeyboardInterrupt: + print("\nInterrupted") + finally: + cap.release() + cv2.destroyAllWindows() + print(f"\nProcessed {frame_count} frames") + + +def main(): + parser = argparse.ArgumentParser(description="Run facial landmark detection") + parser.add_argument("--image", type=str, help="Path to input image") + parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image") + parser.add_argument("--detector", type=str, default="retinaface", + choices=['retinaface', 'scrfd'], help="Face detector to use") + parser.add_argument("--save_dir", type=str, default="outputs", + help="Directory to save output images") + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + + args = parser.parse_args() + + # Validate input + if not args.image and not args.webcam: + parser.error("Either --image or --webcam must be specified") + + if args.verbose: + from uniface import enable_logging + enable_logging() + + # Initialize models + print(f"Initializing detector: {args.detector}") + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() + + print("Initializing landmark detector...") + landmarker = Landmark106() + print("Models initialized\n") + + # Process + if args.webcam: + run_webcam(detector, landmarker) + else: + process_image(detector, landmarker, args.image, args.save_dir) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py index b7bf8fc..c7209f5 100644 --- a/scripts/run_recognition.py +++ b/scripts/run_recognition.py @@ -2,20 +2,12 @@ import cv2 import argparse import numpy as np -# Use the new high-level factory functions for consistency -from uniface.detection import create_detector -from uniface.recognition import create_recognizer +from uniface.detection import RetinaFace, SCRFD +from uniface.recognition import ArcFace, MobileFace, SphereFace +from uniface.face_utils import compute_similarity def run_inference(detector, recognizer, image_path: str): - """ - Detect faces and extract embeddings from a single image. - - Args: - detector: Initialized face detector. - recognizer: Initialized face recognition model. - image_path (str): Path to the input image. - """ image = cv2.imread(image_path) if image is None: print(f"Error: Failed to load image from '{image_path}'") @@ -43,9 +35,47 @@ def run_inference(detector, recognizer, image_path: str): print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}") +def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35): + + # Load images + img1 = cv2.imread(image1_path) + img2 = cv2.imread(image2_path) + + if img1 is None or img2 is None: + print(f"Error: Failed to load images") + return + + # Detect faces + faces1 = detector.detect(img1) + faces2 = detector.detect(img2) + + if not faces1 or not faces2: + print("Error: No faces detected in one or both images") + return + + # Get landmarks for first face in each image + landmarks1 = np.array(faces1[0]['landmarks']) + landmarks2 = np.array(faces2[0]['landmarks']) + + # Get normalized embeddings + embedding1 = recognizer.get_normalized_embedding(img1, landmarks1) + embedding2 = recognizer.get_normalized_embedding(img2, landmarks2) + + # Compute similarity + similarity = compute_similarity(embedding1, embedding2, normalized=True) + is_match = similarity > threshold + + print(f"Similarity: {similarity:.4f}") + print(f"Result: {'Same person' if is_match else 'Different person'}") + print(f"Threshold: {threshold}") + + def main(): - parser = argparse.ArgumentParser(description="Extract face embeddings from a single image.") - parser.add_argument("--image", type=str, required=True, help="Path to the input image.") + parser = argparse.ArgumentParser(description="Face recognition and comparison.") + parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.") + parser.add_argument("--image1", type=str, help="Path to first image for comparison.") + parser.add_argument("--image2", type=str, help="Path to second image for comparison.") + parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.") parser.add_argument( "--detector", type=str, @@ -69,12 +99,29 @@ def main(): enable_logging() print(f"Initializing detector: {args.detector}") - detector = create_detector(method=args.detector) + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() print(f"Initializing recognizer: {args.recognizer}") - recognizer = create_recognizer(method=args.recognizer) + if args.recognizer == 'arcface': + recognizer = ArcFace() + elif args.recognizer == 'mobileface': + recognizer = MobileFace() + else: + recognizer = SphereFace() - run_inference(detector, recognizer, args.image) + if args.image1 and args.image2: + # Face comparison mode + print(f"Comparing faces: {args.image1} vs {args.image2}") + compare_faces(detector, recognizer, args.image1, args.image2, args.threshold) + elif args.image: + # Single image embedding extraction mode + run_inference(detector, recognizer, args.image) + else: + print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison") + parser.print_help() if __name__ == "__main__": diff --git a/scripts/run_video_detection.py b/scripts/run_video_detection.py new file mode 100644 index 0000000..02930c3 --- /dev/null +++ b/scripts/run_video_detection.py @@ -0,0 +1,142 @@ +"""Video Face Detection Script""" + +import cv2 +import argparse +from pathlib import Path +from tqdm import tqdm + +from uniface import RetinaFace, SCRFD +from uniface.visualization import draw_detections + + +def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6, + fps: int = None, show_preview: bool = False): + # Open input video + cap = cv2.VideoCapture(input_path) + if not cap.isOpened(): + print(f"Error: Cannot open video file '{input_path}'") + return + + # Get video properties + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + source_fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + output_fps = fps if fps is not None else source_fps + + print(f"📹 Input: {input_path}") + print(f" Resolution: {width}x{height}") + print(f" FPS: {source_fps:.2f}") + print(f" Total frames: {total_frames}") + print(f"\n📹 Output: {output_path}") + print(f" FPS: {output_fps:.2f}\n") + + # Initialize video writer + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height)) + + if not out.isOpened(): + print(f"Error: Cannot create output video '{output_path}'") + cap.release() + return + + # Process frames + frame_count = 0 + total_faces = 0 + + try: + with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar: + while True: + ret, frame = cap.read() + if not ret: + break + + frame_count += 1 + + # Detect faces + faces = detector.detect(frame) + total_faces += len(faces) + + # Draw detections + bboxes = [f['bbox'] for f in faces] + scores = [f['confidence'] for f in faces] + landmarks = [f['landmarks'] for f in faces] + draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold) + + # Add frame info + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + + # Write frame + out.write(frame) + + # Show preview if requested + if show_preview: + cv2.imshow("Processing Video - Press 'q' to cancel", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + print("\nProcessing cancelled by user") + break + + pbar.update(1) + + except KeyboardInterrupt: + print("\nProcessing interrupted") + finally: + cap.release() + out.release() + if show_preview: + cv2.destroyAllWindows() + + # Summary + print(f"\nProcessing complete!") + print(f" Processed: {frame_count} frames") + print(f" Total faces detected: {total_faces}") + print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "") + print(f" Output saved: {output_path}") + + +def main(): + parser = argparse.ArgumentParser(description="Process video with face detection") + parser.add_argument("--input", type=str, required=True, help="Path to input video") + parser.add_argument("--output", type=str, required=True, help="Path to output video") + parser.add_argument("--detector", type=str, default="retinaface", + choices=['retinaface', 'scrfd'], help="Face detector to use") + parser.add_argument("--threshold", type=float, default=0.6, + help="Confidence threshold for visualization") + parser.add_argument("--fps", type=int, default=None, + help="Output FPS (default: same as input)") + parser.add_argument("--preview", action="store_true", + help="Show live preview during processing") + parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + + args = parser.parse_args() + + # Check input exists + if not Path(args.input).exists(): + print(f"Error: Input file '{args.input}' does not exist") + return + + # Create output directory if needed + output_dir = Path(args.output).parent + if output_dir != Path('.'): + output_dir.mkdir(parents=True, exist_ok=True) + + if args.verbose: + from uniface import enable_logging + enable_logging() + + # Initialize detector + print(f"Initializing detector: {args.detector}") + if args.detector == 'retinaface': + detector = RetinaFace() + else: + detector = SCRFD() + print("Detector initialized\n") + + # Process video + process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview) + + +if __name__ == "__main__": + main() diff --git a/tests/test_age_gender.py b/tests/test_age_gender.py new file mode 100644 index 0000000..9816995 --- /dev/null +++ b/tests/test_age_gender.py @@ -0,0 +1,116 @@ +import numpy as np +import pytest + +from uniface.attribute import AgeGender + + +@pytest.fixture +def age_gender_model(): + return AgeGender() + + +@pytest.fixture +def mock_image(): + return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + +@pytest.fixture +def mock_bbox(): + return [100, 100, 300, 300] + + +def test_model_initialization(age_gender_model): + assert age_gender_model is not None, "AgeGender model initialization failed." + + +def test_prediction_output_format(age_gender_model, mock_image, mock_bbox): + gender, age = age_gender_model.predict(mock_image, mock_bbox) + assert isinstance(gender, str), f"Gender should be string, got {type(gender)}" + assert isinstance(age, int), f"Age should be int, got {type(age)}" + + +def test_gender_values(age_gender_model, mock_image, mock_bbox): + gender, age = age_gender_model.predict(mock_image, mock_bbox) + assert gender in ['Male', 'Female'], f"Gender should be 'Male' or 'Female', got '{gender}'" + + +def test_age_range(age_gender_model, mock_image, mock_bbox): + gender, age = age_gender_model.predict(mock_image, mock_bbox) + assert 0 <= age <= 120, f"Age should be between 0 and 120, got {age}" + + +def test_different_bbox_sizes(age_gender_model, mock_image): + test_bboxes = [ + [50, 50, 150, 150], + [100, 100, 300, 300], + [50, 50, 400, 400], + ] + + for bbox in test_bboxes: + gender, age = age_gender_model.predict(mock_image, bbox) + assert gender in ['Male', 'Female'], f"Failed for bbox {bbox}" + assert 0 <= age <= 120, f"Age out of range for bbox {bbox}" + + +def test_different_image_sizes(age_gender_model, mock_bbox): + test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)] + + for size in test_sizes: + mock_image = np.random.randint(0, 255, size, dtype=np.uint8) + gender, age = age_gender_model.predict(mock_image, mock_bbox) + assert gender in ['Male', 'Female'], f"Failed for image size {size}" + assert 0 <= age <= 120, f"Age out of range for image size {size}" + + +def test_consistency(age_gender_model, mock_image, mock_bbox): + gender1, age1 = age_gender_model.predict(mock_image, mock_bbox) + gender2, age2 = age_gender_model.predict(mock_image, mock_bbox) + + assert gender1 == gender2, "Same input should produce same gender prediction" + assert age1 == age2, "Same input should produce same age prediction" + + +def test_bbox_list_format(age_gender_model, mock_image): + bbox_list = [100, 100, 300, 300] + gender, age = age_gender_model.predict(mock_image, bbox_list) + assert gender in ['Male', 'Female'], "Should work with bbox as list" + assert 0 <= age <= 120, "Age should be in valid range" + + +def test_bbox_array_format(age_gender_model, mock_image): + bbox_array = np.array([100, 100, 300, 300]) + gender, age = age_gender_model.predict(mock_image, bbox_array) + assert gender in ['Male', 'Female'], "Should work with bbox as numpy array" + assert 0 <= age <= 120, "Age should be in valid range" + + +def test_multiple_predictions(age_gender_model, mock_image): + bboxes = [ + [50, 50, 150, 150], + [200, 200, 350, 350], + [400, 400, 550, 550], + ] + + results = [] + for bbox in bboxes: + gender, age = age_gender_model.predict(mock_image, bbox) + results.append((gender, age)) + + assert len(results) == 3, "Should have 3 predictions" + for gender, age in results: + assert gender in ['Male', 'Female'] + assert 0 <= age <= 120 + + +def test_age_is_positive(age_gender_model, mock_image, mock_bbox): + for _ in range(5): + gender, age = age_gender_model.predict(mock_image, mock_bbox) + assert age >= 0, f"Age should be non-negative, got {age}" + + +def test_output_format_for_visualization(age_gender_model, mock_image, mock_bbox): + gender, age = age_gender_model.predict(mock_image, mock_bbox) + text = f"{gender}, {age}y" + assert isinstance(text, str), "Should be able to format as string" + assert "Male" in text or "Female" in text, "Text should contain gender" + assert "y" in text, "Text should contain 'y' for years" diff --git a/tests/test_factory.py b/tests/test_factory.py new file mode 100644 index 0000000..911fe48 --- /dev/null +++ b/tests/test_factory.py @@ -0,0 +1,278 @@ +import numpy as np +import pytest + +from uniface import ( + create_detector, + create_landmarker, + create_recognizer, + detect_faces, + list_available_detectors, +) +from uniface.constants import RetinaFaceWeights, SCRFDWeights + + +# create_detector tests +def test_create_detector_retinaface(): + """ + Test creating a RetinaFace detector using factory function. + """ + detector = create_detector('retinaface') + assert detector is not None, "Failed to create RetinaFace detector" + + +def test_create_detector_scrfd(): + """ + Test creating a SCRFD detector using factory function. + """ + detector = create_detector('scrfd') + assert detector is not None, "Failed to create SCRFD detector" + + +def test_create_detector_with_config(): + """ + Test creating detector with custom configuration. + """ + detector = create_detector( + 'retinaface', + model_name=RetinaFaceWeights.MNET_V2, + conf_thresh=0.8, + nms_thresh=0.3 + ) + assert detector is not None, "Failed to create detector with custom config" + + +def test_create_detector_invalid_method(): + """ + Test that invalid detector method raises an error. + """ + with pytest.raises((ValueError, KeyError)): + create_detector('invalid_method') + + +def test_create_detector_scrfd_with_model(): + """ + Test creating SCRFD detector with specific model. + """ + detector = create_detector( + 'scrfd', + model_name=SCRFDWeights.SCRFD_10G_KPS, + conf_thresh=0.5 + ) + assert detector is not None, "Failed to create SCRFD with specific model" + + +# create_recognizer tests +def test_create_recognizer_arcface(): + """ + Test creating an ArcFace recognizer using factory function. + """ + recognizer = create_recognizer('arcface') + assert recognizer is not None, "Failed to create ArcFace recognizer" + + +def test_create_recognizer_mobileface(): + """ + Test creating a MobileFace recognizer using factory function. + """ + recognizer = create_recognizer('mobileface') + assert recognizer is not None, "Failed to create MobileFace recognizer" + + +def test_create_recognizer_sphereface(): + """ + Test creating a SphereFace recognizer using factory function. + """ + recognizer = create_recognizer('sphereface') + assert recognizer is not None, "Failed to create SphereFace recognizer" + + +def test_create_recognizer_invalid_method(): + """ + Test that invalid recognizer method raises an error. + """ + with pytest.raises((ValueError, KeyError)): + create_recognizer('invalid_method') + + +# create_landmarker tests +def test_create_landmarker(): + """ + Test creating a Landmark106 detector using factory function. + """ + landmarker = create_landmarker('2d106det') + assert landmarker is not None, "Failed to create Landmark106 detector" + + +def test_create_landmarker_default(): + """ + Test creating landmarker with default parameters. + """ + landmarker = create_landmarker() + assert landmarker is not None, "Failed to create default landmarker" + + +def test_create_landmarker_invalid_method(): + """ + Test that invalid landmarker method raises an error. + """ + with pytest.raises((ValueError, KeyError)): + create_landmarker('invalid_method') + + +# detect_faces tests +def test_detect_faces_retinaface(): + """ + Test high-level detect_faces function with RetinaFace. + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = detect_faces(mock_image, method='retinaface') + + assert isinstance(faces, list), "detect_faces should return a list" + + +def test_detect_faces_scrfd(): + """ + Test high-level detect_faces function with SCRFD. + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = detect_faces(mock_image, method='scrfd') + + assert isinstance(faces, list), "detect_faces should return a list" + + +def test_detect_faces_with_threshold(): + """ + Test detect_faces with custom confidence threshold. + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8) + + assert isinstance(faces, list), "detect_faces should return a list" + + # All detections should respect threshold + for face in faces: + assert face['confidence'] >= 0.8, "All detections should meet confidence threshold" + + +def test_detect_faces_default_method(): + """ + Test detect_faces with default method (should use retinaface). + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = detect_faces(mock_image) # No method specified + + assert isinstance(faces, list), "detect_faces should return a list with default method" + + +def test_detect_faces_empty_image(): + """ + Test detect_faces on a blank image. + """ + empty_image = np.zeros((640, 640, 3), dtype=np.uint8) + faces = detect_faces(empty_image, method='retinaface') + + assert isinstance(faces, list), "Should return a list even for empty image" + assert len(faces) == 0, "Should detect no faces in blank image" + + +# list_available_detectors tests +def test_list_available_detectors(): + """ + Test that list_available_detectors returns a dictionary. + """ + detectors = list_available_detectors() + + assert isinstance(detectors, dict), "Should return a dictionary of detectors" + assert len(detectors) > 0, "Should have at least one detector available" + + +def test_list_available_detectors_contents(): + """ + Test that list includes known detectors. + """ + detectors = list_available_detectors() + + # Should include at least these detectors + assert 'retinaface' in detectors, "Should include 'retinaface'" + assert 'scrfd' in detectors, "Should include 'scrfd'" + + +# Integration tests +def test_detector_inference_from_factory(): + """ + Test that detector created from factory can perform inference. + """ + detector = create_detector('retinaface') + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + faces = detector.detect(mock_image) + assert isinstance(faces, list), "Detector should return list of faces" + + +def test_recognizer_inference_from_factory(): + """ + Test that recognizer created from factory can perform inference. + """ + recognizer = create_recognizer('arcface') + mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8) + + embedding = recognizer.get_embedding(mock_image) + assert embedding is not None, "Recognizer should return embedding" + assert embedding.shape[1] == 512, "Should return 512-dimensional embedding" + + +def test_landmarker_inference_from_factory(): + """ + Test that landmarker created from factory can perform inference. + """ + landmarker = create_landmarker('2d106det') + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + mock_bbox = [100, 100, 300, 300] + + landmarks = landmarker.get_landmarks(mock_image, mock_bbox) + assert landmarks is not None, "Landmarker should return landmarks" + assert landmarks.shape == (106, 2), "Should return 106 landmarks" + + +def test_multiple_detector_creation(): + """ + Test that multiple detectors can be created independently. + """ + detector1 = create_detector('retinaface') + detector2 = create_detector('scrfd') + + assert detector1 is not None + assert detector2 is not None + assert detector1 is not detector2, "Should create separate instances" + + +def test_detector_with_different_configs(): + """ + Test creating multiple detectors with different configurations. + """ + detector_high_thresh = create_detector('retinaface', conf_thresh=0.9) + detector_low_thresh = create_detector('retinaface', conf_thresh=0.3) + + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + faces_high = detector_high_thresh.detect(mock_image) + faces_low = detector_low_thresh.detect(mock_image) + + # Both should work + assert isinstance(faces_high, list) + assert isinstance(faces_low, list) + + +def test_factory_returns_correct_types(): + """ + Test that factory functions return instances of the correct types. + """ + from uniface import RetinaFace, ArcFace, Landmark106 + + detector = create_detector('retinaface') + recognizer = create_recognizer('arcface') + landmarker = create_landmarker('2d106det') + + assert isinstance(detector, RetinaFace), "Should return RetinaFace instance" + assert isinstance(recognizer, ArcFace), "Should return ArcFace instance" + assert isinstance(landmarker, Landmark106), "Should return Landmark106 instance" diff --git a/tests/test_landmark.py b/tests/test_landmark.py new file mode 100644 index 0000000..a5c2a4b --- /dev/null +++ b/tests/test_landmark.py @@ -0,0 +1,107 @@ +import numpy as np +import pytest + +from uniface.landmark import Landmark106 + + +@pytest.fixture +def landmark_model(): + return Landmark106() + + +@pytest.fixture +def mock_image(): + return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + +@pytest.fixture +def mock_bbox(): + return [100, 100, 300, 300] + + +def test_model_initialization(landmark_model): + assert landmark_model is not None, "Landmark106 model initialization failed." + + +def test_landmark_detection(landmark_model, mock_image, mock_bbox): + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + assert landmarks.shape == (106, 2), f"Expected shape (106, 2), got {landmarks.shape}" + + +def test_landmark_dtype(landmark_model, mock_image, mock_bbox): + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + assert landmarks.dtype == np.float32, f"Expected float32, got {landmarks.dtype}" + + +def test_landmark_coordinates_within_image(landmark_model, mock_image, mock_bbox): + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + + x_coords = landmarks[:, 0] + y_coords = landmarks[:, 1] + + x1, y1, x2, y2 = mock_bbox + margin = 50 + + x_in_bounds = np.sum((x_coords >= x1 - margin) & (x_coords <= x2 + margin)) + y_in_bounds = np.sum((y_coords >= y1 - margin) & (y_coords <= y2 + margin)) + + assert x_in_bounds >= 95, f"Only {x_in_bounds}/106 x-coordinates within bounds" + assert y_in_bounds >= 95, f"Only {y_in_bounds}/106 y-coordinates within bounds" + + +def test_different_bbox_sizes(landmark_model, mock_image): + test_bboxes = [ + [50, 50, 150, 150], + [100, 100, 300, 300], + [50, 50, 400, 400], + ] + + for bbox in test_bboxes: + landmarks = landmark_model.get_landmarks(mock_image, bbox) + assert landmarks.shape == (106, 2), f"Failed for bbox {bbox}" + + +def test_landmark_array_format(landmark_model, mock_image, mock_bbox): + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + landmarks_int = landmarks.astype(int) + + assert landmarks_int.shape == (106, 2), "Integer conversion should preserve shape" + assert landmarks_int.dtype in [np.int32, np.int64], "Should convert to integer type" + + +def test_consistency(landmark_model, mock_image, mock_bbox): + landmarks1 = landmark_model.get_landmarks(mock_image, mock_bbox) + landmarks2 = landmark_model.get_landmarks(mock_image, mock_bbox) + + assert np.allclose(landmarks1, landmarks2), "Same input should produce same landmarks" + + +def test_different_image_sizes(landmark_model, mock_bbox): + test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)] + + for size in test_sizes: + mock_image = np.random.randint(0, 255, size, dtype=np.uint8) + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + assert landmarks.shape == (106, 2), f"Failed for image size {size}" + + +def test_bbox_list_format(landmark_model, mock_image): + bbox_list = [100, 100, 300, 300] + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + assert landmarks.shape == (106, 2), "Should work with bbox as list" + + +def test_bbox_array_format(landmark_model, mock_image): + bbox_array = np.array([100, 100, 300, 300]) + landmarks = landmark_model.get_landmarks(mock_image, bbox_array) + assert landmarks.shape == (106, 2), "Should work with bbox as numpy array" + + +def test_landmark_distribution(landmark_model, mock_image, mock_bbox): + landmarks = landmark_model.get_landmarks(mock_image, mock_bbox) + + x_variance = np.var(landmarks[:, 0]) + y_variance = np.var(landmarks[:, 1]) + + assert x_variance > 0, "Landmarks should have variation in x-coordinates" + assert y_variance > 0, "Landmarks should have variation in y-coordinates" diff --git a/tests/test_recognition.py b/tests/test_recognition.py new file mode 100644 index 0000000..91d373c --- /dev/null +++ b/tests/test_recognition.py @@ -0,0 +1,211 @@ +import numpy as np +import pytest + +from uniface.recognition import ArcFace, MobileFace, SphereFace + + +@pytest.fixture +def arcface_model(): + """ + Fixture to initialize the ArcFace model for testing. + """ + return ArcFace() + + +@pytest.fixture +def mobileface_model(): + """ + Fixture to initialize the MobileFace model for testing. + """ + return MobileFace() + + +@pytest.fixture +def sphereface_model(): + """ + Fixture to initialize the SphereFace model for testing. + """ + return SphereFace() + + +@pytest.fixture +def mock_aligned_face(): + """ + Create a mock 112x112 aligned face image. + """ + return np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8) + + +@pytest.fixture +def mock_landmarks(): + """ + Create mock 5-point facial landmarks. + """ + return np.array([ + [38.2946, 51.6963], + [73.5318, 51.5014], + [56.0252, 71.7366], + [41.5493, 92.3655], + [70.7299, 92.2041] + ], dtype=np.float32) + + +# ArcFace Tests +def test_arcface_initialization(arcface_model): + """ + Test that the ArcFace model initializes correctly. + """ + assert arcface_model is not None, "ArcFace model initialization failed." + + +def test_arcface_embedding_shape(arcface_model, mock_aligned_face): + """ + Test that ArcFace produces embeddings with the correct shape. + """ + embedding = arcface_model.get_embedding(mock_aligned_face) + + # ArcFace typically produces 512-dimensional embeddings + assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}" + assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1" + + +def test_arcface_normalized_embedding(arcface_model, mock_landmarks): + """ + Test that normalized embeddings have unit length. + """ + # Create a larger mock image for alignment + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + embedding = arcface_model.get_normalized_embedding(mock_image, mock_landmarks) + + # Check that embedding is normalized (L2 norm ≈ 1.0) + norm = np.linalg.norm(embedding) + assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}" + + +def test_arcface_embedding_dtype(arcface_model, mock_aligned_face): + """ + Test that embeddings have the correct data type. + """ + embedding = arcface_model.get_embedding(mock_aligned_face) + assert embedding.dtype == np.float32, f"Expected float32, got {embedding.dtype}" + + +def test_arcface_consistency(arcface_model, mock_aligned_face): + """ + Test that the same input produces the same embedding. + """ + embedding1 = arcface_model.get_embedding(mock_aligned_face) + embedding2 = arcface_model.get_embedding(mock_aligned_face) + + assert np.allclose(embedding1, embedding2), "Same input should produce same embedding" + + +# MobileFace Tests +def test_mobileface_initialization(mobileface_model): + """ + Test that the MobileFace model initializes correctly. + """ + assert mobileface_model is not None, "MobileFace model initialization failed." + + +def test_mobileface_embedding_shape(mobileface_model, mock_aligned_face): + """ + Test that MobileFace produces embeddings with the correct shape. + """ + embedding = mobileface_model.get_embedding(mock_aligned_face) + + # MobileFace typically produces 512-dimensional embeddings + assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}" + assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1" + + +def test_mobileface_normalized_embedding(mobileface_model, mock_landmarks): + """ + Test that MobileFace normalized embeddings have unit length. + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + embedding = mobileface_model.get_normalized_embedding(mock_image, mock_landmarks) + + norm = np.linalg.norm(embedding) + assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}" + + +# SphereFace Tests +def test_sphereface_initialization(sphereface_model): + """ + Test that the SphereFace model initializes correctly. + """ + assert sphereface_model is not None, "SphereFace model initialization failed." + + +def test_sphereface_embedding_shape(sphereface_model, mock_aligned_face): + """ + Test that SphereFace produces embeddings with the correct shape. + """ + embedding = sphereface_model.get_embedding(mock_aligned_face) + + # SphereFace typically produces 512-dimensional embeddings + assert embedding.shape[1] == 512, f"Expected 512-dim embedding, got {embedding.shape[1]}" + assert embedding.shape[0] == 1, "Embedding should have batch dimension of 1" + + +def test_sphereface_normalized_embedding(sphereface_model, mock_landmarks): + """ + Test that SphereFace normalized embeddings have unit length. + """ + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + embedding = sphereface_model.get_normalized_embedding(mock_image, mock_landmarks) + + norm = np.linalg.norm(embedding) + assert np.isclose(norm, 1.0, atol=1e-5), f"Normalized embedding should have norm 1.0, got {norm}" + + +# Cross-model comparison tests +def test_different_models_different_embeddings(arcface_model, mobileface_model, mock_aligned_face): + """ + Test that different models produce different embeddings for the same input. + """ + arcface_emb = arcface_model.get_embedding(mock_aligned_face) + mobileface_emb = mobileface_model.get_embedding(mock_aligned_face) + + # Embeddings should be different (with high probability for random input) + # We check that they're not identical + assert not np.allclose(arcface_emb, mobileface_emb), \ + "Different models should produce different embeddings" + + +def test_embedding_similarity_computation(arcface_model, mock_aligned_face): + """ + Test computing similarity between embeddings. + """ + # Get two embeddings + emb1 = arcface_model.get_embedding(mock_aligned_face) + + # Create a slightly different image + mock_aligned_face2 = mock_aligned_face.copy() + mock_aligned_face2[:10, :10] = 0 # Modify a small region + emb2 = arcface_model.get_embedding(mock_aligned_face2) + + # Compute cosine similarity + from uniface import compute_similarity + similarity = compute_similarity(emb1, emb2) + + # Similarity should be between -1 and 1 + assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}" + + +def test_same_face_high_similarity(arcface_model, mock_aligned_face): + """ + Test that the same face produces high similarity. + """ + emb1 = arcface_model.get_embedding(mock_aligned_face) + emb2 = arcface_model.get_embedding(mock_aligned_face) + + from uniface import compute_similarity + similarity = compute_similarity(emb1, emb2) + + # Same image should have similarity close to 1.0 + assert similarity > 0.99, f"Same face should have similarity > 0.99, got {similarity}" diff --git a/tests/test_retinaface.py b/tests/test_retinaface.py index 1ec4f27..6ed1d28 100644 --- a/tests/test_retinaface.py +++ b/tests/test_retinaface.py @@ -7,9 +7,6 @@ from uniface.detection import RetinaFace @pytest.fixture def retinaface_model(): - """ - Fixture to initialize the RetinaFace model for testing. - """ return RetinaFace( model_name=RetinaFaceWeights.MNET_V2, conf_thresh=0.5, @@ -20,67 +17,39 @@ def retinaface_model(): def test_model_initialization(retinaface_model): - """ - Test that the RetinaFace model initializes correctly. - """ assert retinaface_model is not None, "Model initialization failed." def test_inference_on_640x640_image(retinaface_model): - """ - Test inference on a 640x640 BGR image. - """ - # Generate a mock 640x640 BGR image mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) - - # Run inference - returns list of dictionaries faces = retinaface_model.detect(mock_image) - # Check output type assert isinstance(faces, list), "Detections should be a list." - # Check that each face has the expected structure for face in faces: assert isinstance(face, dict), "Each detection should be a dictionary." assert "bbox" in face, "Each detection should have a 'bbox' key." assert "confidence" in face, "Each detection should have a 'confidence' key." assert "landmarks" in face, "Each detection should have a 'landmarks' key." - # Check bbox format bbox = face["bbox"] assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)." - # Check landmarks format landmarks = face["landmarks"] assert len(landmarks) == 5, "Should have 5 landmark points." assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)." def test_confidence_threshold(retinaface_model): - """ - Test that detections respect the confidence threshold. - """ - # Generate a mock 640x640 BGR image mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) - - # Run inference faces = retinaface_model.detect(mock_image) - # Ensure all detections have confidence scores above the threshold for face in faces: confidence = face["confidence"] assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5" def test_no_faces_detected(retinaface_model): - """ - Test inference on an image without detectable faces. - """ - # Generate an empty (black) 640x640 image empty_image = np.zeros((640, 640, 3), dtype=np.uint8) - - # Run inference faces = retinaface_model.detect(empty_image) - - # Ensure no detections are found assert len(faces) == 0, "Should detect no faces in a blank image." diff --git a/tests/test_scrfd.py b/tests/test_scrfd.py new file mode 100644 index 0000000..c25101d --- /dev/null +++ b/tests/test_scrfd.py @@ -0,0 +1,71 @@ +import numpy as np +import pytest + +from uniface.constants import SCRFDWeights +from uniface.detection import SCRFD + + +@pytest.fixture +def scrfd_model(): + return SCRFD( + model_name=SCRFDWeights.SCRFD_500M_KPS, + conf_thresh=0.5, + nms_thresh=0.4, + ) + + +def test_model_initialization(scrfd_model): + assert scrfd_model is not None, "Model initialization failed." + + +def test_inference_on_640x640_image(scrfd_model): + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = scrfd_model.detect(mock_image) + + assert isinstance(faces, list), "Detections should be a list." + + for face in faces: + assert isinstance(face, dict), "Each detection should be a dictionary." + assert "bbox" in face, "Each detection should have a 'bbox' key." + assert "confidence" in face, "Each detection should have a 'confidence' key." + assert "landmarks" in face, "Each detection should have a 'landmarks' key." + + bbox = face["bbox"] + assert len(bbox) == 4, "BBox should have 4 values (x1, y1, x2, y2)." + + landmarks = face["landmarks"] + assert len(landmarks) == 5, "Should have 5 landmark points." + assert all(len(pt) == 2 for pt in landmarks), "Each landmark should be (x, y)." + + +def test_confidence_threshold(scrfd_model): + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = scrfd_model.detect(mock_image) + + for face in faces: + confidence = face["confidence"] + assert confidence >= 0.5, f"Detection has confidence {confidence} below threshold 0.5" + + +def test_no_faces_detected(scrfd_model): + empty_image = np.zeros((640, 640, 3), dtype=np.uint8) + faces = scrfd_model.detect(empty_image) + assert len(faces) == 0, "Should detect no faces in a blank image." + + +def test_different_input_sizes(scrfd_model): + test_sizes = [(480, 640, 3), (720, 1280, 3), (1080, 1920, 3)] + + for size in test_sizes: + mock_image = np.random.randint(0, 255, size, dtype=np.uint8) + faces = scrfd_model.detect(mock_image) + assert isinstance(faces, list), f"Should return list for size {size}" + + +def test_scrfd_10g_model(): + model = SCRFD(model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5) + assert model is not None, "SCRFD 10G model initialization failed." + + mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + faces = model.detect(mock_image) + assert isinstance(faces, list), "SCRFD 10G should return list of detections." diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..653fa7f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,247 @@ +import numpy as np +import pytest + +from uniface import compute_similarity, face_alignment + + +@pytest.fixture +def mock_image(): + """ + Create a mock 640x640 BGR image. + """ + return np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8) + + +@pytest.fixture +def mock_landmarks(): + """ + Create mock 5-point facial landmarks. + Standard positions for a face roughly centered at (112/2, 112/2). + """ + return np.array([ + [38.2946, 51.6963], # Left eye + [73.5318, 51.5014], # Right eye + [56.0252, 71.7366], # Nose + [41.5493, 92.3655], # Left mouth corner + [70.7299, 92.2041] # Right mouth corner + ], dtype=np.float32) + + +# compute_similarity tests +def test_compute_similarity_same_embedding(): + """ + Test that similarity of an embedding with itself is 1.0. + """ + embedding = np.random.randn(1, 512).astype(np.float32) + embedding = embedding / np.linalg.norm(embedding) # Normalize + + similarity = compute_similarity(embedding, embedding) + assert np.isclose(similarity, 1.0, atol=1e-5), f"Self-similarity should be 1.0, got {similarity}" + + +def test_compute_similarity_range(): + """ + Test that similarity is always in the range [-1, 1]. + """ + # Test with multiple random embeddings + for _ in range(10): + emb1 = np.random.randn(1, 512).astype(np.float32) + emb2 = np.random.randn(1, 512).astype(np.float32) + + # Normalize + emb1 = emb1 / np.linalg.norm(emb1) + emb2 = emb2 / np.linalg.norm(emb2) + + similarity = compute_similarity(emb1, emb2) + assert -1.0 <= similarity <= 1.0, f"Similarity should be in [-1, 1], got {similarity}" + + +def test_compute_similarity_orthogonal(): + """ + Test that orthogonal embeddings have similarity close to 0. + """ + # Create orthogonal embeddings + emb1 = np.zeros((1, 512), dtype=np.float32) + emb1[0, 0] = 1.0 # [1, 0, 0, ..., 0] + + emb2 = np.zeros((1, 512), dtype=np.float32) + emb2[0, 1] = 1.0 # [0, 1, 0, ..., 0] + + similarity = compute_similarity(emb1, emb2) + assert np.isclose(similarity, 0.0, atol=1e-5), f"Orthogonal embeddings should have similarity 0.0, got {similarity}" + + +def test_compute_similarity_opposite(): + """ + Test that opposite embeddings have similarity close to -1. + """ + emb1 = np.ones((1, 512), dtype=np.float32) + emb1 = emb1 / np.linalg.norm(emb1) + + emb2 = -emb1 # Opposite direction + + similarity = compute_similarity(emb1, emb2) + assert np.isclose(similarity, -1.0, atol=1e-5), f"Opposite embeddings should have similarity -1.0, got {similarity}" + + +def test_compute_similarity_symmetry(): + """ + Test that similarity(A, B) == similarity(B, A). + """ + emb1 = np.random.randn(1, 512).astype(np.float32) + emb2 = np.random.randn(1, 512).astype(np.float32) + + # Normalize + emb1 = emb1 / np.linalg.norm(emb1) + emb2 = emb2 / np.linalg.norm(emb2) + + sim_12 = compute_similarity(emb1, emb2) + sim_21 = compute_similarity(emb2, emb1) + + assert np.isclose(sim_12, sim_21), "Similarity should be symmetric" + + +def test_compute_similarity_dtype(): + """ + Test that compute_similarity returns a float. + """ + emb1 = np.random.randn(1, 512).astype(np.float32) + emb2 = np.random.randn(1, 512).astype(np.float32) + + # Normalize + emb1 = emb1 / np.linalg.norm(emb1) + emb2 = emb2 / np.linalg.norm(emb2) + + similarity = compute_similarity(emb1, emb2) + assert isinstance(similarity, (float, np.floating)), f"Similarity should be float, got {type(similarity)}" + + +# face_alignment tests +def test_face_alignment_output_shape(mock_image, mock_landmarks): + """ + Test that face_alignment produces output with the correct shape. + """ + aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + + assert aligned.shape == (112, 112, 3), f"Expected shape (112, 112, 3), got {aligned.shape}" + + +def test_face_alignment_dtype(mock_image, mock_landmarks): + """ + Test that aligned face has the correct data type. + """ + aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + + assert aligned.dtype == np.uint8, f"Expected uint8, got {aligned.dtype}" + + +def test_face_alignment_different_sizes(mock_image, mock_landmarks): + """ + Test face alignment with different output sizes. + """ + # Only test sizes that are multiples of 112 or 128 as required by the function + test_sizes = [(112, 112), (128, 128), (224, 224)] + + for size in test_sizes: + aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=size) + assert aligned.shape == (*size, 3), f"Failed for size {size}" + + +def test_face_alignment_consistency(mock_image, mock_landmarks): + """ + Test that the same input produces the same aligned face. + """ + aligned1, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + aligned2, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + + assert np.allclose(aligned1, aligned2), "Same input should produce same aligned face" + + +def test_face_alignment_landmarks_as_list(mock_image): + """ + Test that landmarks can be passed as a list of lists (converted to array). + """ + landmarks_list = [ + [38.2946, 51.6963], + [73.5318, 51.5014], + [56.0252, 71.7366], + [41.5493, 92.3655], + [70.7299, 92.2041] + ] + + # Convert list to numpy array before passing to face_alignment + landmarks_array = np.array(landmarks_list, dtype=np.float32) + aligned, _ = face_alignment(mock_image, landmarks_array, image_size=(112, 112)) + assert aligned.shape == (112, 112, 3), "Should work with landmarks as array" + + +def test_face_alignment_value_range(mock_image, mock_landmarks): + """ + Test that aligned face pixel values are in valid range [0, 255]. + """ + aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + + assert np.all(aligned >= 0), "Pixel values should be >= 0" + assert np.all(aligned <= 255), "Pixel values should be <= 255" + + +def test_face_alignment_not_all_zeros(mock_image, mock_landmarks): + """ + Test that aligned face is not all zeros (actual transformation occurred). + """ + aligned, _ = face_alignment(mock_image, mock_landmarks, image_size=(112, 112)) + + # At least some pixels should be non-zero + assert np.any(aligned > 0), "Aligned face should have some non-zero pixels" + + +def test_face_alignment_from_different_positions(mock_image): + """ + Test alignment with landmarks at different positions in the image. + """ + # Landmarks at different positions + positions = [ + np.array([[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]], dtype=np.float32), + np.array([[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]], dtype=np.float32), + np.array([[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]], dtype=np.float32), + ] + + for landmarks in positions: + aligned, _ = face_alignment(mock_image, landmarks, image_size=(112, 112)) + assert aligned.shape == (112, 112, 3), f"Failed for landmarks at {landmarks[0]}" + + +def test_face_alignment_landmark_count(mock_image): + """ + Test that face_alignment works specifically with 5-point landmarks. + """ + # Standard 5-point landmarks + landmarks_5pt = np.array([ + [38.2946, 51.6963], + [73.5318, 51.5014], + [56.0252, 71.7366], + [41.5493, 92.3655], + [70.7299, 92.2041] + ], dtype=np.float32) + + aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112)) + assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks" + + +def test_compute_similarity_with_recognition_embeddings(): + """ + Test compute_similarity with realistic embedding dimensions. + """ + # Simulate ArcFace/MobileFace/SphereFace embeddings (512-dim) + emb1 = np.random.randn(1, 512).astype(np.float32) + emb2 = np.random.randn(1, 512).astype(np.float32) + + # Normalize (as done in get_normalized_embedding) + emb1 = emb1 / np.linalg.norm(emb1) + emb2 = emb2 / np.linalg.norm(emb2) + + similarity = compute_similarity(emb1, emb2) + + # Should be a valid similarity score + assert -1.0 <= similarity <= 1.0 + assert isinstance(similarity, (float, np.floating)) diff --git a/uniface/__init__.py b/uniface/__init__.py index 3d32c0e..a20d487 100644 --- a/uniface/__init__.py +++ b/uniface/__init__.py @@ -13,7 +13,7 @@ __license__ = "MIT" __author__ = "Yakhyokhuja Valikhujaev" -__version__ = "1.0.0" +__version__ = "1.0.1" from uniface.face_utils import compute_similarity, face_alignment diff --git a/uniface/constants.py b/uniface/constants.py index bef205c..de6b806 100644 --- a/uniface/constants.py +++ b/uniface/constants.py @@ -29,7 +29,7 @@ class ArcFaceWeights(str, Enum): Pretrained weights from ArcFace model (insightface). https://github.com/deepinsight/insightface """ - MNET = "arcface_mnet" + MNET = "arcface_mnet" RESNET = "arcface_resnet" class RetinaFaceWeights(str, Enum): @@ -84,42 +84,42 @@ class LandmarkWeights(str, Enum): MODEL_URLS: Dict[Enum, str] = { # RetinaFace - RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv1_0.25.onnx', - RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv1_0.50.onnx', - RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv1.onnx', - RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_mv2.onnx', - RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_r18.onnx', - RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/retinaface_r34.onnx', + RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx', + RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx', + RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx', + RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx', + RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx', + RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx', # MobileFace - MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', - MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', - MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', - MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', + MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx', + MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx', + MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx', + MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx', # SphereFace - SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', - SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/###', + SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx', + SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx', # ArcFace - ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/w600k_mbf.onnx', - ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/w600k_r50.onnx', + ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx', + ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx', # SCRFD - SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/scrfd_10g_kps.onnx', - SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/scrfd_500m_kps.onnx', + SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx', + SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx', # DDAFM - DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/affecnet7.script', - DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/affecnet8.script', + DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script', + DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script', # AgeGender - AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/genderage.onnx', + AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx', # Landmarks - LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/v0.1.2/2d106det.onnx', + LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx', } MODEL_SHA256: Dict[Enum, str] = { @@ -132,14 +132,14 @@ MODEL_SHA256: Dict[Enum, str] = { RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630', # MobileFace - MobileFaceWeights.MNET_025: '#', - MobileFaceWeights.MNET_V2: '#', - MobileFaceWeights.MNET_V3_SMALL: '#', - MobileFaceWeights.MNET_V3_LARGE: '#', + MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b', + MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89', + MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3', + MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a', # SphereFace - SphereFaceWeights.SPHERE20: '#', - SphereFaceWeights.SPHERE36: '#', + SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d', + SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20', # ArcFace diff --git a/uniface/detection/retinaface.py b/uniface/detection/retinaface.py index 7957262..659c0e4 100644 --- a/uniface/detection/retinaface.py +++ b/uniface/detection/retinaface.py @@ -287,15 +287,15 @@ if __name__ == "__main__": cap = cv2.VideoCapture(0) if not cap.isOpened(): - print("❌ Failed to open webcam.") + print("Failed to open webcam.") exit() - print("📷 Webcam started. Press 'q' to exit.") + print("Webcam started. Press 'q' to exit.") while True: ret, frame = cap.read() if not ret: - print("❌ Failed to read frame.") + print("Failed to read frame.") break # Get face detections as list of dictionaries diff --git a/uniface/detection/scrfd.py b/uniface/detection/scrfd.py index c8ceab3..78b497a 100644 --- a/uniface/detection/scrfd.py +++ b/uniface/detection/scrfd.py @@ -280,15 +280,15 @@ if __name__ == "__main__": cap = cv2.VideoCapture(0) if not cap.isOpened(): - print("❌ Failed to open webcam.") + print("Failed to open webcam.") exit() - print("📷 Webcam started. Press 'q' to exit.") + print("Webcam started. Press 'q' to exit.") while True: ret, frame = cap.read() if not ret: - print("❌ Failed to read frame.") + print("Failed to read frame.") break # Get face detections as list of dictionaries diff --git a/uniface/face_utils.py b/uniface/face_utils.py index 96b8eb5..d50a95d 100644 --- a/uniface/face_utils.py +++ b/uniface/face_utils.py @@ -5,7 +5,7 @@ import cv2 import numpy as np from skimage.transform import SimilarityTransform -from typing import Tuple +from typing import Tuple, Union __all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"] @@ -24,13 +24,14 @@ reference_alignment: np.ndarray = np.array( ) -def estimate_norm(landmark: np.ndarray, image_size: int = 112) -> Tuple[np.ndarray, np.ndarray]: +def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]: """ Estimate the normalization transformation matrix for facial landmarks. Args: landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks. - image_size (int, optional): The size of the output image. Default is 112. + image_size (Union[int, Tuple[int, int]], optional): The size of the output image. + Can be an integer (for square images) or a tuple (width, height). Default is 112. Returns: np.ndarray: The 2x3 transformation matrix for aligning the landmarks. @@ -41,13 +42,20 @@ def estimate_norm(landmark: np.ndarray, image_size: int = 112) -> Tuple[np.ndarr or if image_size is not a multiple of 112 or 128. """ assert landmark.shape == (5, 2), "Landmark array must have shape (5, 2)." - assert image_size % 112 == 0 or image_size % 128 == 0, "Image size must be a multiple of 112 or 128." - if image_size % 112 == 0: - ratio = float(image_size) / 112.0 + # Handle both int and tuple inputs + if isinstance(image_size, tuple): + size = image_size[0] # Use width for ratio calculation + else: + size = image_size + + assert size % 112 == 0 or size % 128 == 0, "Image size must be a multiple of 112 or 128." + + if size % 112 == 0: + ratio = float(size) / 112.0 diff_x = 0.0 else: - ratio = float(image_size) / 128.0 + ratio = float(size) / 128.0 diff_x = 8.0 * ratio # Adjust reference alignment based on ratio and diff_x @@ -64,14 +72,15 @@ def estimate_norm(landmark: np.ndarray, image_size: int = 112) -> Tuple[np.ndarr return matrix, inverse_matrix -def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: int = 112) -> Tuple[np.ndarray, np.ndarray]: +def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]: """ Align the face in the input image based on the given facial landmarks. Args: image (np.ndarray): Input image as a NumPy array. landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks. - image_size (int, optional): The size of the aligned output image. Default is 112. + image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image. + Can be an integer (for square images) or a tuple (width, height). Default is 112. Returns: np.ndarray: The aligned face as a NumPy array. @@ -80,8 +89,14 @@ def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: int = 11 # Get the transformation matrix M, M_inv = estimate_norm(landmark, image_size) + # Handle both int and tuple for warpAffine output size + if isinstance(image_size, int): + output_size = (image_size, image_size) + else: + output_size = image_size + # Warp the input image to align the face - warped = cv2.warpAffine(image, M, (image_size, image_size), borderValue=0.0) + warped = cv2.warpAffine(image, M, output_size, borderValue=0.0) return warped, M_inv diff --git a/uniface/landmark/models.py b/uniface/landmark/models.py index 8dcd100..40c3cf8 100644 --- a/uniface/landmark/models.py +++ b/uniface/landmark/models.py @@ -157,15 +157,13 @@ class Landmark106(BaseLandmarker): -# TODO: For testing purposes only, remote later +# Testing code if __name__ == "__main__": - # UPDATED: Use the high-level factory functions - from uniface.detection import create_detector - from uniface.landmark import create_landmarker + from uniface.detection import RetinaFace + from uniface.landmark import Landmark106 - # 1. Create the detector and landmarker using the new API - face_detector = create_detector('retinaface') - landmarker = create_landmarker() # Uses the default '2d106det' method + face_detector = RetinaFace() + landmarker = Landmark106() cap = cv2.VideoCapture(0) if not cap.isOpened(): diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py index f581d5e..e38cbca 100644 --- a/uniface/recognition/base.py +++ b/uniface/recognition/base.py @@ -109,19 +109,23 @@ class BaseRecognizer(ABC): return blob - def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray: + def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray: """ Extracts face embedding from an image. Args: - image: Input face image (BGR format). - landmarks: Facial landmarks (5 points for alignment). + image: Input face image (BGR format). If already aligned (112x112), landmarks can be None. + landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned. Returns: Face embedding vector (typically 512-dimensional). """ - # Align face using landmarks - aligned_face, _ = face_alignment(image, landmarks) + # If landmarks are provided, align the face first + if landmarks is not None: + aligned_face, _ = face_alignment(image, landmarks, image_size=self.input_size) + else: + # Assume image is already aligned + aligned_face = image # Generate embedding from aligned face face_blob = self.preprocess(aligned_face)