Merge pull request #23 from yakhyo/test-files-update

feat: Some minor changes to code style and warning supression
This commit is contained in:
Yakhyokhuja Valikhujaev
2025-11-26 00:16:49 +09:00
committed by GitHub
41 changed files with 1164 additions and 1300 deletions

View File

@@ -394,12 +394,28 @@ pip install -e ".[dev]"
# Run tests
pytest
# Format code
black uniface/
isort uniface/
```
### Code Formatting
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
```bash
# Format code
ruff format .
# Check for linting errors
ruff check .
# Auto-fix linting errors
ruff check . --fix
```
Ruff configuration is in `pyproject.toml`. Key settings:
- Line length: 120
- Python target: 3.10+
- Import sorting: `uniface` as first-party
### Project Structure
```

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,6 @@
[project]
name = "uniface"
version = "1.1.0"
version = "1.1.1"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Age, and Gender Detection"
readme = "README.md"
license = { text = "MIT" }
@@ -19,7 +19,7 @@ dependencies = [
requires-python = ">=3.10"
[project.optional-dependencies]
dev = ["pytest>=7.0.0"]
dev = ["pytest>=7.0.0", "ruff>=0.4.0"]
gpu = ["onnxruntime-gpu>=1.16.0"]
[project.urls]
@@ -35,3 +35,13 @@ packages = { find = {} }
[tool.setuptools.package-data]
"uniface" = ["*.txt", "*.md"]
[tool.ruff]
line-length = 120
target-version = "py310"
[tool.ruff.lint]
select = ["E", "F", "I", "W"]
[tool.ruff.lint.isort]
known-first-party = ["uniface"]

View File

@@ -1,97 +1,68 @@
# Scripts
Collection of example scripts demonstrating UniFace functionality.
Scripts for testing UniFace features.
## Available Scripts
- `run_detection.py` - Face detection on images
- `run_age_gender.py` - Age and gender prediction
- `run_landmarks.py` - Facial landmark detection
- `run_recognition.py` - Face recognition and embeddings
- `run_face_search.py` - Face search and matching
- `run_video_detection.py` - Video processing with face detection
- `batch_process.py` - Batch processing of image folders
- `download_model.py` - Download and manage models
| Script | Description |
|--------|-------------|
| `run_detection.py` | Face detection on image or webcam |
| `run_age_gender.py` | Age and gender prediction |
| `run_landmarks.py` | 106-point facial landmark detection |
| `run_recognition.py` | Face embedding extraction and comparison |
| `run_face_search.py` | Real-time face matching against reference |
| `run_video_detection.py` | Face detection on video files |
| `batch_process.py` | Batch process folder of images |
| `download_model.py` | Download model weights |
| `sha256_generate.py` | Generate SHA256 hash for model files |
## Quick Start
## Usage Examples
```bash
# Face detection
python scripts/run_detection.py --image assets/test.jpg
python scripts/run_detection.py --webcam
# Age and gender detection
# Age and gender
python scripts/run_age_gender.py --image assets/test.jpg
# Webcam demo
python scripts/run_age_gender.py --webcam
# Landmarks
python scripts/run_landmarks.py --image assets/test.jpg
python scripts/run_landmarks.py --webcam
# Face recognition (extract embedding)
python scripts/run_recognition.py --image assets/test.jpg
# Face comparison
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
# Face search (match webcam against reference)
python scripts/run_face_search.py --image reference.jpg
# Video processing
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
# Batch processing
python scripts/batch_process.py --input images/ --output results/
# Download models
python scripts/download_model.py --model-type retinaface
python scripts/download_model.py # downloads all
```
## Import Examples
The scripts use direct class imports for better developer experience:
```python
# Face Detection
from uniface.detection import RetinaFace, SCRFD
detector = RetinaFace() # or SCRFD()
faces = detector.detect(image)
# Face Recognition
from uniface.recognition import ArcFace, MobileFace, SphereFace
recognizer = ArcFace() # or MobileFace(), SphereFace()
embedding = recognizer.get_embedding(image, landmarks)
# Age & Gender
from uniface.attribute import AgeGender
age_gender = AgeGender()
gender, age = age_gender.predict(image, bbox)
# Landmarks
from uniface.landmark import Landmark106
landmarker = Landmark106()
landmarks = landmarker.get_landmarks(image, bbox)
```
## Available Classes
**Detection:**
- `RetinaFace` - High accuracy face detection
- `SCRFD` - Fast face detection
**Recognition:**
- `ArcFace` - High accuracy face recognition
- `MobileFace` - Lightweight face recognition
- `SphereFace` - Alternative face recognition
**Attributes:**
- `AgeGender` - Age and gender prediction
**Landmarks:**
- `Landmark106` - 106-point facial landmarks
## Common Options
Most scripts support:
- `--help` - Show usage information
- `--verbose` - Enable detailed logging
- `--detector` - Choose detector (retinaface, scrfd)
- `--threshold` - Set confidence threshold
| Option | Description |
|--------|-------------|
| `--image` | Path to input image |
| `--webcam` | Use webcam instead of image |
| `--detector` | Choose detector: `retinaface` or `scrfd` |
| `--threshold` | Visualization confidence threshold (default: 0.6) |
| `--save_dir` | Output directory (default: `outputs`) |
## Testing
## Quick Test
Run basic functionality test:
```bash
python scripts/run_detection.py --image assets/test.jpg
```
For comprehensive testing, see the main project tests:
```bash
pytest tests/
```

View File

@@ -1,156 +1,95 @@
"""Batch Image Processing Script"""
# Batch face detection on a folder of images
# Usage: python batch_process.py --input images/ --output results/
import os
import cv2
import argparse
from pathlib import Path
import cv2
from tqdm import tqdm
from uniface import RetinaFace, SCRFD
from uniface import SCRFD, RetinaFace
from uniface.visualization import draw_detections
def get_image_files(input_dir: Path, extensions: tuple) -> list:
image_files = []
files = []
for ext in extensions:
image_files.extend(input_dir.glob(f"*.{ext}"))
image_files.extend(input_dir.glob(f"*.{ext.upper()}"))
return sorted(image_files)
files.extend(input_dir.glob(f"*.{ext}"))
files.extend(input_dir.glob(f"*.{ext.upper()}"))
return sorted(files)
def process_single_image(detector, image_path: Path, output_dir: Path,
vis_threshold: float, skip_existing: bool) -> dict:
output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}"
# Skip if already processed
if skip_existing and output_path.exists():
return {"status": "skipped", "faces": 0}
# Load image
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
"""Process single image. Returns face count or -1 on error."""
image = cv2.imread(str(image_path))
if image is None:
return {"status": "error", "error": "Failed to load image"}
return -1
# Detect faces
try:
faces = detector.detect(image)
except Exception as e:
return {"status": "error", "error": str(e)}
faces = detector.detect(image)
# Draw detections
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
# unpack face data for visualization
bboxes = [f["bbox"] for f in faces]
scores = [f["confidence"] for f in faces]
landmarks = [f["landmarks"] for f in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
# Add face count
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# Save result
cv2.putText(
image,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imwrite(str(output_path), image)
return {"status": "success", "faces": len(faces)}
def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple,
vis_threshold: float, skip_existing: bool):
input_path = Path(input_dir)
output_path = Path(output_dir)
# Create output directory
output_path.mkdir(parents=True, exist_ok=True)
# Get image files
image_files = get_image_files(input_path, extensions)
if not image_files:
print(f"No image files found in '{input_dir}' with extensions {extensions}")
return
print(f"Input: {input_dir}")
print(f"Output: {output_dir}")
print(f"Found {len(image_files)} images\n")
# Process images
results = {
"success": 0,
"skipped": 0,
"error": 0,
"total_faces": 0
}
with tqdm(image_files, desc="Processing images", unit="img") as pbar:
for image_path in pbar:
result = process_single_image(
detector, image_path, output_path,
vis_threshold, skip_existing
)
if result["status"] == "success":
results["success"] += 1
results["total_faces"] += result["faces"]
pbar.set_postfix({"faces": result["faces"]})
elif result["status"] == "skipped":
results["skipped"] += 1
else:
results["error"] += 1
print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}")
# Print summary
print(f"\nBatch processing complete!")
print(f" Total images: {len(image_files)}")
print(f" Successfully processed: {results['success']}")
print(f" Skipped: {results['skipped']}")
print(f" Errors: {results['error']}")
print(f" Total faces detected: {results['total_faces']}")
if results['success'] > 0:
print(f" Average faces per image: {results['total_faces']/results['success']:.2f}")
print(f"\nResults saved to: {output_dir}")
return len(faces)
def main():
parser = argparse.ArgumentParser(description="Batch process images with face detection")
parser.add_argument("--input", type=str, required=True,
help="Input directory containing images")
parser.add_argument("--output", type=str, required=True,
help="Output directory for processed images")
parser.add_argument("--detector", type=str, default="retinaface",
choices=['retinaface', 'scrfd'], help="Face detector to use")
parser.add_argument("--threshold", type=float, default=0.6,
help="Confidence threshold for visualization")
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp",
help="Comma-separated list of image extensions")
parser.add_argument("--skip_existing", action="store_true",
help="Skip files that already exist in output directory")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument("--input", type=str, required=True, help="Input directory")
parser.add_argument("--output", type=str, required=True, help="Output directory")
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions")
args = parser.parse_args()
# Check input directory exists
if not Path(args.input).exists():
input_path = Path(args.input)
output_path = Path(args.output)
if not input_path.exists():
print(f"Error: Input directory '{args.input}' does not exist")
return
if args.verbose:
from uniface import enable_logging
enable_logging()
output_path.mkdir(parents=True, exist_ok=True)
# Parse extensions
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
extensions = tuple(ext.strip() for ext in args.extensions.split(","))
image_files = get_image_files(input_path, extensions)
# Initialize detector
print(f"Initializing detector: {args.detector}")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
print("Detector initialized\n")
if not image_files:
print(f"No images found with extensions {extensions}")
return
# Process batch
batch_process(detector, args.input, args.output, extensions,
args.threshold, args.skip_existing)
print(f"Found {len(image_files)} images")
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
success, errors, total_faces = 0, 0, 0
for img_path in tqdm(image_files, desc="Processing", unit="img"):
out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}"
result = process_image(detector, img_path, out_path, args.threshold)
if result >= 0:
success += 1
total_faces += result
else:
errors += 1
print(f"\nFailed: {img_path.name}")
print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total")
if __name__ == "__main__":

View File

@@ -1,76 +1,59 @@
import argparse
from uniface.constants import (
RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights,
SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights
AgeGenderWeights,
ArcFaceWeights,
DDAMFNWeights,
LandmarkWeights,
MobileFaceWeights,
RetinaFaceWeights,
SCRFDWeights,
SphereFaceWeights,
)
from uniface.model_store import verify_model_weights
# All available model types
ALL_MODEL_TYPES = {
'retinaface': RetinaFaceWeights,
'sphereface': SphereFaceWeights,
'mobileface': MobileFaceWeights,
'arcface': ArcFaceWeights,
'scrfd': SCRFDWeights,
'ddamfn': DDAMFNWeights,
'agegender': AgeGenderWeights,
'landmark': LandmarkWeights,
MODEL_TYPES = {
"retinaface": RetinaFaceWeights,
"sphereface": SphereFaceWeights,
"mobileface": MobileFaceWeights,
"arcface": ArcFaceWeights,
"scrfd": SCRFDWeights,
"ddamfn": DDAMFNWeights,
"agegender": AgeGenderWeights,
"landmark": LandmarkWeights,
}
def download_models(model_enum):
for weight in model_enum:
print(f"Downloading: {weight.value}")
try:
verify_model_weights(weight)
print(f" Done: {weight.value}")
except Exception as e:
print(f" Failed: {e}")
def main():
parser = argparse.ArgumentParser(description="Download and verify model weights.")
parser = argparse.ArgumentParser(description="Download model weights")
parser.add_argument(
"--model-type",
type=str,
choices=list(ALL_MODEL_TYPES.keys()),
help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.",
)
parser.add_argument(
"--model",
type=str,
help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.",
choices=list(MODEL_TYPES.keys()),
help="Model type to download. If not specified, downloads all.",
)
args = parser.parse_args()
if args.model and not args.model_type:
# Backward compatibility - assume RetinaFace
try:
weight = RetinaFaceWeights[args.model]
print(f"Downloading RetinaFace model: {weight.value}")
verify_model_weights(weight)
print("Model downloaded successfully.")
except KeyError:
print(f"Invalid RetinaFace model: {args.model}")
print(f"Available models: {[m.name for m in RetinaFaceWeights]}")
return
if args.model_type:
# Download all models from specific type
model_enum = ALL_MODEL_TYPES[args.model_type]
print(f"Downloading all {args.model_type} models...")
for weight in model_enum:
print(f"Downloading: {weight.value}")
try:
verify_model_weights(weight)
print(f"Downloaded: {weight.value}")
except Exception as e:
print(f"Failed to download {weight.value}: {e}")
print(f"Downloading {args.model_type} models...")
download_models(MODEL_TYPES[args.model_type])
else:
# Download all models from all types
print("Downloading all models...")
for model_type, model_enum in ALL_MODEL_TYPES.items():
print(f"\nDownloading {model_type} models...")
for weight in model_enum:
print(f"Downloading: {weight.value}")
try:
verify_model_weights(weight)
print(f"Downloaded: {weight.value}")
except Exception as e:
print(f"Failed to download {weight.value}: {e}")
for name, model_enum in MODEL_TYPES.items():
print(f"\n{name}:")
download_models(model_enum)
print("\nDownload process completed.")
print("\nDone!")
if __name__ == "__main__":

View File

@@ -1,158 +1,119 @@
"""Age and Gender Detection Demo Script"""
# Age and gender prediction on detected faces
# Usage: python run_age_gender.py --image path/to/image.jpg
# python run_age_gender.py --webcam
import os
import cv2
import argparse
import os
from pathlib import Path
from uniface import RetinaFace, SCRFD, AgeGender
import cv2
from uniface import SCRFD, AgeGender, RetinaFace
from uniface.visualization import draw_detections
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6):
def draw_age_gender_label(image, bbox, gender: str, age: int):
"""Draw age/gender label above the bounding box."""
x1, y1 = int(bbox[0]), int(bbox[1])
text = f"{gender}, {age}y"
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
def process_image(
detector,
age_gender,
image_path: str,
save_dir: str = "outputs",
threshold: float = 0.6,
):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
print(f"Processing: {image_path}")
# Detect faces
faces = detector.detect(image)
print(f" Detected {len(faces)} face(s)")
print(f"Detected {len(faces)} face(s)")
if not faces:
print(" No faces detected")
return
# Draw detections
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
bboxes = [f["bbox"] for f in faces]
scores = [f["confidence"] for f in faces]
landmarks = [f["landmarks"] for f in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
# Predict and draw age/gender for each face
for i, face in enumerate(faces):
gender, age = age_gender.predict(image, face['bbox'])
print(f" Face {i+1}: {gender}, {age} years old")
gender, age = age_gender.predict(image, face["bbox"])
print(f" Face {i + 1}: {gender}, {age} years old")
draw_age_gender_label(image, face["bbox"], gender, age)
# Draw age and gender text
bbox = face['bbox']
x1, y1 = int(bbox[0]), int(bbox[1])
text = f"{gender}, {age}y"
# Background rectangle for text
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(image, (x1, y1 - text_height - 10),
(x1 + text_width + 10, y1), (0, 255, 0), -1)
cv2.putText(image, text, (x1 + 5, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
# Save result
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
cv2.imwrite(output_path, image)
print(f"Output saved: {output_path}")
def run_webcam(detector, age_gender, vis_threshold: float = 0.6):
cap = cv2.VideoCapture(0)
def run_webcam(detector, age_gender, threshold: float = 0.6):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print("Cannot open webcam")
return
print("Webcam opened")
print("Press 'q' to quit\n")
print("Press 'q' to quit")
frame_count = 0
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
try:
while True:
ret, frame = cap.read()
if not ret:
break
faces = detector.detect(frame)
frame_count += 1
# unpack face data for visualization
bboxes = [f["bbox"] for f in faces]
scores = [f["confidence"] for f in faces]
landmarks = [f["landmarks"] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
# Detect faces
faces = detector.detect(frame)
for face in faces:
gender, age = age_gender.predict(frame, face["bbox"]) # predict per face
draw_age_gender_label(frame, face["bbox"], gender, age)
# Draw detections
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow("Age & Gender Detection", frame)
# Predict and draw age/gender for each face
for face in faces:
gender, age = age_gender.predict(frame, face['bbox'])
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Draw age and gender text
bbox = face['bbox']
x1, y1 = int(bbox[0]), int(bbox[1])
text = f"{gender}, {age}y"
# Background rectangle for text
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
cv2.rectangle(frame, (x1, y1 - text_height - 10),
(x1 + text_width + 10, y1), (0, 255, 0), -1)
cv2.putText(frame, text, (x1 + 5, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
# Add info
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.imshow("Age & Gender Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
print("\nInterrupted")
finally:
cap.release()
cv2.destroyAllWindows()
print(f"\nProcessed {frame_count} frames")
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description="Run age and gender detection")
parser.add_argument("--image", type=str, help="Path to input image")
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
parser.add_argument("--detector", type=str, default="retinaface",
choices=['retinaface', 'scrfd'], help="Face detector to use")
parser.add_argument("--threshold", type=float, default=0.6,
help="Confidence threshold for visualization")
parser.add_argument("--save_dir", type=str, default="outputs",
help="Directory to save output images")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument("--webcam", action="store_true", help="Use webcam")
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
parser.add_argument("--save_dir", type=str, default="outputs")
args = parser.parse_args()
# Validate input
if not args.image and not args.webcam:
parser.error("Either --image or --webcam must be specified")
if args.verbose:
from uniface import enable_logging
enable_logging()
# Initialize models
print(f"Initializing detector: {args.detector}")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
print("Initializing age/gender model...")
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
age_gender = AgeGender()
print("Models initialized\n")
# Process
if args.webcam:
run_webcam(detector, age_gender, args.threshold)
else:

View File

@@ -1,79 +1,94 @@
import os
import cv2
import time
import argparse
import numpy as np
# Face detection on image or webcam
# Usage: python run_detection.py --image path/to/image.jpg
# python run_detection.py --webcam
from uniface.detection import RetinaFace, SCRFD
import argparse
import os
import cv2
from uniface.detection import SCRFD, RetinaFace
from uniface.visualization import draw_detections
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
# 1. Get the list of face dictionaries from the detector
faces = detector.detect(image)
if faces:
# 2. Unpack the data into separate lists
bboxes = [face['bbox'] for face in faces]
scores = [face['confidence'] for face in faces]
landmarks = [face['landmarks'] for face in faces]
# 3. Pass the unpacked lists to the drawing function
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
bboxes = [face["bbox"] for face in faces]
scores = [face["confidence"] for face in faces]
landmarks = [face["landmarks"] for face in faces]
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
cv2.imwrite(output_path, image)
print(f"Output saved at: {output_path}")
print(f"Output saved: {output_path}")
def run_webcam(detector, threshold: float = 0.6):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print("Cannot open webcam")
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
# unpack face data for visualization
bboxes = [f["bbox"] for f in faces]
scores = [f["confidence"] for f in faces]
landmarks = [f["landmarks"] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow("Face Detection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description="Run face detection on an image.")
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
parser.add_argument(
"--method",
type=str,
default="retinaface",
choices=['retinaface', 'scrfd'],
help="Detection method to use."
)
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser = argparse.ArgumentParser(description="Run face detection")
parser.add_argument("--image", type=str, help="Path to input image")
parser.add_argument("--webcam", action="store_true", help="Use webcam")
parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
parser.add_argument("--save_dir", type=str, default="outputs")
args = parser.parse_args()
if args.verbose:
from uniface import enable_logging
enable_logging()
if not args.image and not args.webcam:
parser.error("Either --image or --webcam must be specified")
print(f"Initializing detector: {args.method}")
if args.method == 'retinaface':
detector = RetinaFace()
detector = RetinaFace() if args.method == "retinaface" else SCRFD()
if args.webcam:
run_webcam(detector, args.threshold)
else:
detector = SCRFD()
avg_time = 0
for i in range(args.iterations):
start = time.time()
run_inference(detector, args.image, args.threshold, args.save_dir)
elapsed = time.time() - start
print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds")
if i >= 0: # Avoid counting the first run if it includes model loading time
avg_time += elapsed
if args.iterations > 1:
# Adjust average calculation to exclude potential first-run overhead
effective_iterations = max(1, args.iterations)
print(
f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
process_image(detector, args.image, args.threshold, args.save_dir)
if __name__ == "__main__":

View File

@@ -1,13 +1,25 @@
# Real-time face search: match webcam faces against a reference image
# Usage: python run_face_search.py --image reference.jpg
import argparse
import cv2
import numpy as np
from uniface.detection import RetinaFace, SCRFD
from uniface.detection import SCRFD, RetinaFace
from uniface.face_utils import compute_similarity
from uniface.recognition import ArcFace, MobileFace, SphereFace
def get_recognizer(name: str):
if name == "arcface":
return ArcFace()
elif name == "mobileface":
return MobileFace()
else:
return SphereFace()
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
image = cv2.imread(image_path)
if image is None:
@@ -17,44 +29,37 @@ def extract_reference_embedding(detector, recognizer, image_path: str) -> np.nda
if not faces:
raise RuntimeError("No faces found in reference image.")
# Get landmarks from the first detected face dictionary
landmarks = np.array(faces[0]["landmarks"])
# Use normalized embedding for more reliable similarity comparison
embedding = recognizer.get_normalized_embedding(image, landmarks)
return embedding
return recognizer.get_normalized_embedding(image, landmarks)
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
cap = cv2.VideoCapture(0)
def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
raise RuntimeError("Webcam could not be opened.")
print("Webcam started. Press 'q' to quit.")
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
faces = detector.detect(frame)
# Loop through each detected face
for face in faces:
# Extract bbox and landmarks from the dictionary
bbox = face["bbox"]
landmarks = np.array(face["landmarks"])
x1, y1, x2, y2 = map(int, bbox)
# Get the normalized embedding for the current face
embedding = recognizer.get_normalized_embedding(frame, landmarks)
sim = compute_similarity(ref_embedding, embedding) # compare with reference
# Compare with the reference embedding
sim = compute_similarity(ref_embedding, embedding)
# Draw results
# green = match, red = unknown
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
@@ -67,43 +72,25 @@ def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float
def main():
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
parser.add_argument(
"--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method."
)
parser = argparse.ArgumentParser(description="Face search using a reference image")
parser.add_argument("--image", type=str, required=True, help="Reference face image")
parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold")
parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"])
parser.add_argument(
"--recognizer",
type=str,
default="arcface",
choices=["arcface", "mobileface", "sphereface"],
help="Face recognition method.",
)
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
args = parser.parse_args()
if args.verbose:
from uniface import enable_logging
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
recognizer = get_recognizer(args.recognizer)
enable_logging()
print("Initializing models...")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
if args.recognizer == 'arcface':
recognizer = ArcFace()
elif args.recognizer == 'mobileface':
recognizer = MobileFace()
else:
recognizer = SphereFace()
print("Extracting reference embedding...")
print(f"Loading reference: {args.image}")
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
run_video(detector, recognizer, ref_embedding)
run_webcam(detector, recognizer, ref_embedding, args.threshold)
if __name__ == "__main__":

View File

@@ -1,11 +1,14 @@
"""Facial Landmark Detection Demo Script"""
# 106-point facial landmark detection
# Usage: python run_landmarks.py --image path/to/image.jpg
# python run_landmarks.py --webcam
import os
import cv2
import argparse
import os
from pathlib import Path
from uniface import RetinaFace, SCRFD, Landmark106
import cv2
from uniface import SCRFD, Landmark106, RetinaFace
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
@@ -14,39 +17,33 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output
print(f"Error: Failed to load image from '{image_path}'")
return
print(f"Processing: {image_path}")
# Detect faces
faces = detector.detect(image)
print(f" Detected {len(faces)} face(s)")
print(f"Detected {len(faces)} face(s)")
if not faces:
print(" No faces detected")
return
# Process each face
for i, face in enumerate(faces):
# Draw bounding box
bbox = face['bbox']
bbox = face["bbox"]
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Get and draw 106 landmarks
landmarks = landmarker.get_landmarks(image, bbox)
print(f" Face {i+1}: Extracted {len(landmarks)} landmarks")
print(f" Face {i + 1}: {len(landmarks)} landmarks")
for x, y in landmarks.astype(int):
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
# Add face count
cv2.putText(image, f"Face {i+1}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.putText(
image,
f"Face {i + 1}",
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 255, 0),
2,
)
# Add total count
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# Save result
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
cv2.imwrite(output_path, image)
@@ -54,91 +51,62 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output
def run_webcam(detector, landmarker):
cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture(0) # 0 = default webcam
if not cap.isOpened():
print("Cannot open webcam")
return
print("Webcam opened")
print("Press 'q' to quit\n")
print("Press 'q' to quit")
frame_count = 0
while True:
ret, frame = cap.read()
frame = cv2.flip(frame, 1) # mirror for natural interaction
if not ret:
break
try:
while True:
ret, frame = cap.read()
if not ret:
break
faces = detector.detect(frame)
frame_count += 1
for face in faces:
bbox = face["bbox"]
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Detect faces
faces = detector.detect(frame)
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
# Process each face
for face in faces:
# Draw bounding box
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
cv2.imshow("106-Point Landmarks", frame)
# Get and draw 106 landmarks
landmarks = landmarker.get_landmarks(frame, bbox)
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Add info
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
cv2.imshow("106-Point Landmarks", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
print("\nInterrupted")
finally:
cap.release()
cv2.destroyAllWindows()
print(f"\nProcessed {frame_count} frames")
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description="Run facial landmark detection")
parser.add_argument("--image", type=str, help="Path to input image")
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
parser.add_argument("--detector", type=str, default="retinaface",
choices=['retinaface', 'scrfd'], help="Face detector to use")
parser.add_argument("--save_dir", type=str, default="outputs",
help="Directory to save output images")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument("--webcam", action="store_true", help="Use webcam")
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument("--save_dir", type=str, default="outputs")
args = parser.parse_args()
# Validate input
if not args.image and not args.webcam:
parser.error("Either --image or --webcam must be specified")
if args.verbose:
from uniface import enable_logging
enable_logging()
# Initialize models
print(f"Initializing detector: {args.detector}")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
print("Initializing landmark detector...")
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
landmarker = Landmark106()
print("Models initialized\n")
# Process
if args.webcam:
run_webcam(detector, landmarker)
else:

View File

@@ -1,10 +1,24 @@
import cv2
# Face recognition: extract embeddings or compare two faces
# Usage: python run_recognition.py --image path/to/image.jpg
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
import argparse
import cv2
import numpy as np
from uniface.detection import RetinaFace, SCRFD
from uniface.recognition import ArcFace, MobileFace, SphereFace
from uniface.detection import SCRFD, RetinaFace
from uniface.face_utils import compute_similarity
from uniface.recognition import ArcFace, MobileFace, SphereFace
def get_recognizer(name: str):
if name == "arcface":
return ArcFace()
elif name == "mobileface":
return MobileFace()
else:
return SphereFace()
def run_inference(detector, recognizer, image_path: str):
@@ -14,38 +28,29 @@ def run_inference(detector, recognizer, image_path: str):
return
faces = detector.detect(image)
if not faces:
print("No faces detected.")
return
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...")
# Process the first detected face
first_face = faces[0]
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
# Extract embedding using the landmarks from the face dictionary
landmarks = np.array(faces[0]["landmarks"]) # 5-point landmarks for alignment
embedding = recognizer.get_embedding(image, landmarks)
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
# Print some info about the embeddings
print(f" - Embedding shape: {embedding.shape}")
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
print(f" Embedding shape: {embedding.shape}")
print(f" L2 norm (raw): {np.linalg.norm(embedding):.4f}")
print(f" L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}")
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
# Load images
img1 = cv2.imread(image1_path)
img2 = cv2.imread(image2_path)
if img1 is None or img2 is None:
print(f"Error: Failed to load images")
print("Error: Failed to load one or both images")
return
# Detect faces
faces1 = detector.detect(img1)
faces2 = detector.detect(img2)
@@ -53,74 +58,44 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
print("Error: No faces detected in one or both images")
return
# Get landmarks for first face in each image
landmarks1 = np.array(faces1[0]['landmarks'])
landmarks2 = np.array(faces2[0]['landmarks'])
landmarks1 = np.array(faces1[0]["landmarks"])
landmarks2 = np.array(faces2[0]["landmarks"])
# Get normalized embeddings
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
# Compute similarity
# cosine similarity for normalized embeddings
similarity = compute_similarity(embedding1, embedding2, normalized=True)
is_match = similarity > threshold
print(f"Similarity: {similarity:.4f}")
print(f"Result: {'Same person' if is_match else 'Different person'}")
print(f"Threshold: {threshold}")
print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})")
def main():
parser = argparse.ArgumentParser(description="Face recognition and comparison.")
parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.")
parser.add_argument("--image1", type=str, help="Path to first image for comparison.")
parser.add_argument("--image2", type=str, help="Path to second image for comparison.")
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.")
parser.add_argument(
"--detector",
type=str,
default="retinaface",
choices=['retinaface', 'scrfd'],
help="Face detection method to use."
)
parser = argparse.ArgumentParser(description="Face recognition and comparison")
parser.add_argument("--image", type=str, help="Single image for embedding extraction")
parser.add_argument("--image1", type=str, help="First image for comparison")
parser.add_argument("--image2", type=str, help="Second image for comparison")
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold")
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument(
"--recognizer",
type=str,
default="arcface",
choices=['arcface', 'mobileface', 'sphereface'],
help="Face recognition method to use."
choices=["arcface", "mobileface", "sphereface"],
)
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
args = parser.parse_args()
if args.verbose:
from uniface import enable_logging
enable_logging()
print(f"Initializing detector: {args.detector}")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
print(f"Initializing recognizer: {args.recognizer}")
if args.recognizer == 'arcface':
recognizer = ArcFace()
elif args.recognizer == 'mobileface':
recognizer = MobileFace()
else:
recognizer = SphereFace()
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
recognizer = get_recognizer(args.recognizer)
if args.image1 and args.image2:
# Face comparison mode
print(f"Comparing faces: {args.image1} vs {args.image2}")
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
elif args.image:
# Single image embedding extraction mode
run_inference(detector, recognizer, args.image)
else:
print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison")
print("Error: Provide --image or both --image1 and --image2")
parser.print_help()

View File

@@ -1,141 +1,106 @@
"""Video Face Detection Script"""
# Face detection on video files
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
import cv2
import argparse
from pathlib import Path
import cv2
from tqdm import tqdm
from uniface import RetinaFace, SCRFD
from uniface import SCRFD, RetinaFace
from uniface.visualization import draw_detections
def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
fps: int = None, show_preview: bool = False):
# Open input video
def process_video(
detector,
input_path: str,
output_path: str,
threshold: float = 0.6,
show_preview: bool = False,
):
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
print(f"Error: Cannot open video file '{input_path}'")
return
# Get video properties
# get video properties
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
source_fps = cap.get(cv2.CAP_PROP_FPS)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_fps = fps if fps is not None else source_fps
print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
print(f"Output: {output_path}")
print(f"📹 Input: {input_path}")
print(f" Resolution: {width}x{height}")
print(f" FPS: {source_fps:.2f}")
print(f" Total frames: {total_frames}")
print(f"\n📹 Output: {output_path}")
print(f" FPS: {output_fps:.2f}\n")
# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # codec for .mp4
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
if not out.isOpened():
print(f"Error: Cannot create output video '{output_path}'")
cap.release()
return
# Process frames
frame_count = 0
total_faces = 0
try:
with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
while True:
ret, frame = cap.read()
if not ret:
break
for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
ret, frame = cap.read()
if not ret:
break
frame_count += 1
frame_count += 1
faces = detector.detect(frame)
total_faces += len(faces)
# Detect faces
faces = detector.detect(frame)
total_faces += len(faces)
bboxes = [f["bbox"] for f in faces]
scores = [f["confidence"] for f in faces]
landmarks = [f["landmarks"] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
# Draw detections
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 255, 0),
2,
)
out.write(frame)
# Add frame info
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# Write frame
out.write(frame)
# Show preview if requested
if show_preview:
cv2.imshow("Processing Video - Press 'q' to cancel", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
print("\nProcessing cancelled by user")
break
pbar.update(1)
except KeyboardInterrupt:
print("\nProcessing interrupted")
finally:
cap.release()
out.release()
if show_preview:
cv2.destroyAllWindows()
cv2.imshow("Processing - Press 'q' to cancel", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
print("\nCancelled by user")
break
# Summary
print(f"\nProcessing complete!")
print(f" Processed: {frame_count} frames")
print(f" Total faces detected: {total_faces}")
print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
print(f" Output saved: {output_path}")
cap.release()
out.release()
if show_preview:
cv2.destroyAllWindows()
avg_faces = total_faces / frame_count if frame_count > 0 else 0
print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
print(f"Saved: {output_path}")
def main():
parser = argparse.ArgumentParser(description="Process video with face detection")
parser.add_argument("--input", type=str, required=True, help="Path to input video")
parser.add_argument("--output", type=str, required=True, help="Path to output video")
parser.add_argument("--detector", type=str, default="retinaface",
choices=['retinaface', 'scrfd'], help="Face detector to use")
parser.add_argument("--threshold", type=float, default=0.6,
help="Confidence threshold for visualization")
parser.add_argument("--fps", type=int, default=None,
help="Output FPS (default: same as input)")
parser.add_argument("--preview", action="store_true",
help="Show live preview during processing")
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
parser.add_argument("--input", type=str, required=True, help="Input video path")
parser.add_argument("--output", type=str, required=True, help="Output video path")
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
parser.add_argument("--preview", action="store_true", help="Show live preview")
args = parser.parse_args()
# Check input exists
if not Path(args.input).exists():
print(f"Error: Input file '{args.input}' does not exist")
return
# Create output directory if needed
output_dir = Path(args.output).parent
if output_dir != Path('.'):
output_dir.mkdir(parents=True, exist_ok=True)
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
if args.verbose:
from uniface import enable_logging
enable_logging()
# Initialize detector
print(f"Initializing detector: {args.detector}")
if args.detector == 'retinaface':
detector = RetinaFace()
else:
detector = SCRFD()
print("Detector initialized\n")
# Process video
process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
process_video(detector, args.input, args.output, args.threshold, args.preview)
if __name__ == "__main__":

View File

@@ -12,15 +12,8 @@ def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
def main():
parser = argparse.ArgumentParser(
description="Compute SHA256 hash of a model weight file."
)
parser.add_argument(
"file",
type=Path,
help="Path to the model weight file (.onnx, .pth, etc)."
)
parser = argparse.ArgumentParser(description="Compute SHA256 hash of a file")
parser.add_argument("file", type=Path, help="Path to file")
args = parser.parse_args()
if not args.file.exists() or not args.file.is_file():
@@ -28,7 +21,7 @@ def main():
return
sha256 = compute_sha256(args.file)
print(f"`SHA256 hash for '{args.file.name}':\n{sha256}")
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
if __name__ == "__main__":

View File

@@ -31,7 +31,7 @@ def test_prediction_output_format(age_gender_model, mock_image, mock_bbox):
def test_gender_values(age_gender_model, mock_image, mock_bbox):
gender, age = age_gender_model.predict(mock_image, mock_bbox)
assert gender in ['Male', 'Female'], f"Gender should be 'Male' or 'Female', got '{gender}'"
assert gender in ["Male", "Female"], f"Gender should be 'Male' or 'Female', got '{gender}'"
def test_age_range(age_gender_model, mock_image, mock_bbox):
@@ -48,7 +48,7 @@ def test_different_bbox_sizes(age_gender_model, mock_image):
for bbox in test_bboxes:
gender, age = age_gender_model.predict(mock_image, bbox)
assert gender in ['Male', 'Female'], f"Failed for bbox {bbox}"
assert gender in ["Male", "Female"], f"Failed for bbox {bbox}"
assert 0 <= age <= 120, f"Age out of range for bbox {bbox}"
@@ -58,7 +58,7 @@ def test_different_image_sizes(age_gender_model, mock_bbox):
for size in test_sizes:
mock_image = np.random.randint(0, 255, size, dtype=np.uint8)
gender, age = age_gender_model.predict(mock_image, mock_bbox)
assert gender in ['Male', 'Female'], f"Failed for image size {size}"
assert gender in ["Male", "Female"], f"Failed for image size {size}"
assert 0 <= age <= 120, f"Age out of range for image size {size}"
@@ -73,14 +73,14 @@ def test_consistency(age_gender_model, mock_image, mock_bbox):
def test_bbox_list_format(age_gender_model, mock_image):
bbox_list = [100, 100, 300, 300]
gender, age = age_gender_model.predict(mock_image, bbox_list)
assert gender in ['Male', 'Female'], "Should work with bbox as list"
assert gender in ["Male", "Female"], "Should work with bbox as list"
assert 0 <= age <= 120, "Age should be in valid range"
def test_bbox_array_format(age_gender_model, mock_image):
bbox_array = np.array([100, 100, 300, 300])
gender, age = age_gender_model.predict(mock_image, bbox_array)
assert gender in ['Male', 'Female'], "Should work with bbox as numpy array"
assert gender in ["Male", "Female"], "Should work with bbox as numpy array"
assert 0 <= age <= 120, "Age should be in valid range"
@@ -98,7 +98,7 @@ def test_multiple_predictions(age_gender_model, mock_image):
assert len(results) == 3, "Should have 3 predictions"
for gender, age in results:
assert gender in ['Male', 'Female']
assert gender in ["Male", "Female"]
assert 0 <= age <= 120

View File

@@ -16,7 +16,7 @@ def test_create_detector_retinaface():
"""
Test creating a RetinaFace detector using factory function.
"""
detector = create_detector('retinaface')
detector = create_detector("retinaface")
assert detector is not None, "Failed to create RetinaFace detector"
@@ -24,7 +24,7 @@ def test_create_detector_scrfd():
"""
Test creating a SCRFD detector using factory function.
"""
detector = create_detector('scrfd')
detector = create_detector("scrfd")
assert detector is not None, "Failed to create SCRFD detector"
@@ -33,10 +33,10 @@ def test_create_detector_with_config():
Test creating detector with custom configuration.
"""
detector = create_detector(
'retinaface',
"retinaface",
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.8,
nms_thresh=0.3
nms_thresh=0.3,
)
assert detector is not None, "Failed to create detector with custom config"
@@ -46,18 +46,14 @@ def test_create_detector_invalid_method():
Test that invalid detector method raises an error.
"""
with pytest.raises((ValueError, KeyError)):
create_detector('invalid_method')
create_detector("invalid_method")
def test_create_detector_scrfd_with_model():
"""
Test creating SCRFD detector with specific model.
"""
detector = create_detector(
'scrfd',
model_name=SCRFDWeights.SCRFD_10G_KPS,
conf_thresh=0.5
)
detector = create_detector("scrfd", model_name=SCRFDWeights.SCRFD_10G_KPS, conf_thresh=0.5)
assert detector is not None, "Failed to create SCRFD with specific model"
@@ -66,7 +62,7 @@ def test_create_recognizer_arcface():
"""
Test creating an ArcFace recognizer using factory function.
"""
recognizer = create_recognizer('arcface')
recognizer = create_recognizer("arcface")
assert recognizer is not None, "Failed to create ArcFace recognizer"
@@ -74,7 +70,7 @@ def test_create_recognizer_mobileface():
"""
Test creating a MobileFace recognizer using factory function.
"""
recognizer = create_recognizer('mobileface')
recognizer = create_recognizer("mobileface")
assert recognizer is not None, "Failed to create MobileFace recognizer"
@@ -82,7 +78,7 @@ def test_create_recognizer_sphereface():
"""
Test creating a SphereFace recognizer using factory function.
"""
recognizer = create_recognizer('sphereface')
recognizer = create_recognizer("sphereface")
assert recognizer is not None, "Failed to create SphereFace recognizer"
@@ -91,7 +87,7 @@ def test_create_recognizer_invalid_method():
Test that invalid recognizer method raises an error.
"""
with pytest.raises((ValueError, KeyError)):
create_recognizer('invalid_method')
create_recognizer("invalid_method")
# create_landmarker tests
@@ -99,7 +95,7 @@ def test_create_landmarker():
"""
Test creating a Landmark106 detector using factory function.
"""
landmarker = create_landmarker('2d106det')
landmarker = create_landmarker("2d106det")
assert landmarker is not None, "Failed to create Landmark106 detector"
@@ -116,7 +112,7 @@ def test_create_landmarker_invalid_method():
Test that invalid landmarker method raises an error.
"""
with pytest.raises((ValueError, KeyError)):
create_landmarker('invalid_method')
create_landmarker("invalid_method")
# detect_faces tests
@@ -125,7 +121,7 @@ def test_detect_faces_retinaface():
Test high-level detect_faces function with RetinaFace.
"""
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
faces = detect_faces(mock_image, method='retinaface')
faces = detect_faces(mock_image, method="retinaface")
assert isinstance(faces, list), "detect_faces should return a list"
@@ -135,7 +131,7 @@ def test_detect_faces_scrfd():
Test high-level detect_faces function with SCRFD.
"""
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
faces = detect_faces(mock_image, method='scrfd')
faces = detect_faces(mock_image, method="scrfd")
assert isinstance(faces, list), "detect_faces should return a list"
@@ -145,13 +141,13 @@ def test_detect_faces_with_threshold():
Test detect_faces with custom confidence threshold.
"""
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
faces = detect_faces(mock_image, method='retinaface', conf_thresh=0.8)
faces = detect_faces(mock_image, method="retinaface", conf_thresh=0.8)
assert isinstance(faces, list), "detect_faces should return a list"
# All detections should respect threshold
for face in faces:
assert face['confidence'] >= 0.8, "All detections should meet confidence threshold"
assert face["confidence"] >= 0.8, "All detections should meet confidence threshold"
def test_detect_faces_default_method():
@@ -169,7 +165,7 @@ def test_detect_faces_empty_image():
Test detect_faces on a blank image.
"""
empty_image = np.zeros((640, 640, 3), dtype=np.uint8)
faces = detect_faces(empty_image, method='retinaface')
faces = detect_faces(empty_image, method="retinaface")
assert isinstance(faces, list), "Should return a list even for empty image"
assert len(faces) == 0, "Should detect no faces in blank image"
@@ -193,8 +189,8 @@ def test_list_available_detectors_contents():
detectors = list_available_detectors()
# Should include at least these detectors
assert 'retinaface' in detectors, "Should include 'retinaface'"
assert 'scrfd' in detectors, "Should include 'scrfd'"
assert "retinaface" in detectors, "Should include 'retinaface'"
assert "scrfd" in detectors, "Should include 'scrfd'"
# Integration tests
@@ -202,7 +198,7 @@ def test_detector_inference_from_factory():
"""
Test that detector created from factory can perform inference.
"""
detector = create_detector('retinaface')
detector = create_detector("retinaface")
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
faces = detector.detect(mock_image)
@@ -213,7 +209,7 @@ def test_recognizer_inference_from_factory():
"""
Test that recognizer created from factory can perform inference.
"""
recognizer = create_recognizer('arcface')
recognizer = create_recognizer("arcface")
mock_image = np.random.randint(0, 255, (112, 112, 3), dtype=np.uint8)
embedding = recognizer.get_embedding(mock_image)
@@ -225,7 +221,7 @@ def test_landmarker_inference_from_factory():
"""
Test that landmarker created from factory can perform inference.
"""
landmarker = create_landmarker('2d106det')
landmarker = create_landmarker("2d106det")
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
mock_bbox = [100, 100, 300, 300]
@@ -238,8 +234,8 @@ def test_multiple_detector_creation():
"""
Test that multiple detectors can be created independently.
"""
detector1 = create_detector('retinaface')
detector2 = create_detector('scrfd')
detector1 = create_detector("retinaface")
detector2 = create_detector("scrfd")
assert detector1 is not None
assert detector2 is not None
@@ -250,8 +246,8 @@ def test_detector_with_different_configs():
"""
Test creating multiple detectors with different configurations.
"""
detector_high_thresh = create_detector('retinaface', conf_thresh=0.9)
detector_low_thresh = create_detector('retinaface', conf_thresh=0.3)
detector_high_thresh = create_detector("retinaface", conf_thresh=0.9)
detector_low_thresh = create_detector("retinaface", conf_thresh=0.3)
mock_image = np.random.randint(0, 255, (640, 640, 3), dtype=np.uint8)
@@ -269,9 +265,9 @@ def test_factory_returns_correct_types():
"""
from uniface import RetinaFace, ArcFace, Landmark106
detector = create_detector('retinaface')
recognizer = create_recognizer('arcface')
landmarker = create_landmarker('2d106det')
detector = create_detector("retinaface")
recognizer = create_recognizer("arcface")
landmarker = create_landmarker("2d106det")
assert isinstance(detector, RetinaFace), "Should return RetinaFace instance"
assert isinstance(recognizer, ArcFace), "Should return ArcFace instance"

View File

@@ -41,13 +41,16 @@ def mock_landmarks():
"""
Create mock 5-point facial landmarks.
"""
return np.array([
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
], dtype=np.float32)
return np.array(
[
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
],
dtype=np.float32,
)
# ArcFace Tests
@@ -173,8 +176,7 @@ def test_different_models_different_embeddings(arcface_model, mobileface_model,
# Embeddings should be different (with high probability for random input)
# We check that they're not identical
assert not np.allclose(arcface_emb, mobileface_emb), \
"Different models should produce different embeddings"
assert not np.allclose(arcface_emb, mobileface_emb), "Different models should produce different embeddings"
def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
@@ -191,6 +193,7 @@ def test_embedding_similarity_computation(arcface_model, mock_aligned_face):
# Compute cosine similarity
from uniface import compute_similarity
similarity = compute_similarity(emb1, emb2)
# Similarity should be between -1 and 1
@@ -205,6 +208,7 @@ def test_same_face_high_similarity(arcface_model, mock_aligned_face):
emb2 = arcface_model.get_embedding(mock_aligned_face)
from uniface import compute_similarity
similarity = compute_similarity(emb1, emb2)
# Same image should have similarity close to 1.0

View File

@@ -18,13 +18,16 @@ def mock_landmarks():
Create mock 5-point facial landmarks.
Standard positions for a face roughly centered at (112/2, 112/2).
"""
return np.array([
[38.2946, 51.6963], # Left eye
[73.5318, 51.5014], # Right eye
[56.0252, 71.7366], # Nose
[41.5493, 92.3655], # Left mouth corner
[70.7299, 92.2041] # Right mouth corner
], dtype=np.float32)
return np.array(
[
[38.2946, 51.6963], # Left eye
[73.5318, 51.5014], # Right eye
[56.0252, 71.7366], # Nose
[41.5493, 92.3655], # Left mouth corner
[70.7299, 92.2041], # Right mouth corner
],
dtype=np.float32,
)
# compute_similarity tests
@@ -166,7 +169,7 @@ def test_face_alignment_landmarks_as_list(mock_image):
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
[70.7299, 92.2041],
]
# Convert list to numpy array before passing to face_alignment
@@ -201,9 +204,18 @@ def test_face_alignment_from_different_positions(mock_image):
"""
# Landmarks at different positions
positions = [
np.array([[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]], dtype=np.float32),
np.array([[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]], dtype=np.float32),
np.array([[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]], dtype=np.float32),
np.array(
[[100, 100], [150, 100], [125, 130], [110, 150], [140, 150]],
dtype=np.float32,
),
np.array(
[[300, 200], [350, 200], [325, 230], [310, 250], [340, 250]],
dtype=np.float32,
),
np.array(
[[500, 400], [550, 400], [525, 430], [510, 450], [540, 450]],
dtype=np.float32,
),
]
for landmarks in positions:
@@ -216,13 +228,16 @@ def test_face_alignment_landmark_count(mock_image):
Test that face_alignment works specifically with 5-point landmarks.
"""
# Standard 5-point landmarks
landmarks_5pt = np.array([
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
], dtype=np.float32)
landmarks_5pt = np.array(
[
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041],
],
dtype=np.float32,
)
aligned, _ = face_alignment(mock_image, landmarks_5pt, image_size=(112, 112))
assert aligned.shape == (112, 112, 3), "Should work with 5-point landmarks"

View File

@@ -13,7 +13,7 @@
__license__ = "MIT"
__author__ = "Yakhyokhuja Valikhujaev"
__version__ = "1.1.0"
__version__ = "1.1.1"
from uniface.face_utils import compute_similarity, face_alignment
@@ -22,11 +22,18 @@ from uniface.model_store import verify_model_weights
from uniface.visualization import draw_detections
from .attribute import AgeGender
try:
from .attribute import Emotion
except ImportError:
Emotion = None # PyTorch not installed
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
from .detection import (
SCRFD,
RetinaFace,
create_detector,
detect_faces,
list_available_detectors,
)
from .landmark import Landmark106, create_landmarker
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer

View File

@@ -2,7 +2,8 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Dict, Any, List, Union
from typing import Any, Dict, List, Union
import numpy as np
from uniface.attribute.age_gender import AgeGender
@@ -12,18 +13,14 @@ from uniface.constants import AgeGenderWeights, DDAMFNWeights
# Emotion requires PyTorch - make it optional
try:
from uniface.attribute.emotion import Emotion
_EMOTION_AVAILABLE = True
except ImportError:
Emotion = None
_EMOTION_AVAILABLE = False
# Public API for the attribute module
__all__ = [
"AgeGender",
"Emotion",
"create_attribute_predictor",
"predict_attributes"
]
__all__ = ["AgeGender", "Emotion", "create_attribute_predictor", "predict_attributes"]
# A mapping from model enums to their corresponding attribute classes
_ATTRIBUTE_MODELS = {
@@ -35,10 +32,7 @@ if _EMOTION_AVAILABLE:
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
def create_attribute_predictor(
model_name: Union[AgeGenderWeights, DDAMFNWeights],
**kwargs: Any
) -> Attribute:
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
"""
Factory function to create an attribute predictor instance.
@@ -59,17 +53,16 @@ def create_attribute_predictor(
model_class = _ATTRIBUTE_MODELS.get(model_name)
if model_class is None:
raise ValueError(f"Unsupported attribute model: {model_name}. "
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
raise ValueError(
f"Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights."
)
# Pass model_name to the constructor, as some classes might need it
return model_class(model_name=model_name, **kwargs)
def predict_attributes(
image: np.ndarray,
detections: List[Dict[str, np.ndarray]],
predictor: Attribute
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
) -> List[Dict[str, Any]]:
"""
High-level API to predict attributes for multiple detected faces.
@@ -91,16 +84,16 @@ def predict_attributes(
"""
for face in detections:
# Initialize attributes dict if it doesn't exist
if 'attributes' not in face:
face['attributes'] = {}
if "attributes" not in face:
face["attributes"] = {}
if isinstance(predictor, AgeGender):
gender, age = predictor(image, face['bbox'])
face['attributes']['gender'] = gender
face['attributes']['age'] = age
gender, age = predictor(image, face["bbox"])
face["attributes"]["gender"] = gender
face["attributes"]["age"] = age
elif isinstance(predictor, Emotion):
emotion, confidence = predictor(image, face['landmark'])
face['attributes']['emotion'] = emotion
face['attributes']['confidence'] = confidence
emotion, confidence = predictor(image, face["landmark"])
face["attributes"]["emotion"] = emotion
face["attributes"]["confidence"] = confidence
return detections

View File

@@ -51,8 +51,11 @@ class AgeGender(Attribute):
self.output_names = [output.name for output in self.session.get_outputs()]
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
except Exception as e:
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
Logger.error(
f"Failed to load AgeGender model from '{self.model_path}'",
exc_info=True,
)
raise RuntimeError(f"Failed to initialize AgeGender model: {e}") from e
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
"""
@@ -76,7 +79,11 @@ class AgeGender(Attribute):
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
blob = cv2.dnn.blobFromImage(
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
aligned_face,
scalefactor=1.0,
size=self.input_size[::-1],
mean=(0.0, 0.0, 0.0),
swapRB=True,
)
return blob
@@ -157,7 +164,15 @@ if __name__ == "__main__":
# Prepare text and draw on the frame
label = f"{gender}, {age}"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(0, 255, 0),
2,
)
# Display the resulting frame
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)

View File

@@ -4,6 +4,7 @@
from abc import ABC, abstractmethod
from typing import Any
import numpy as np

View File

@@ -2,15 +2,16 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Tuple, Union
import cv2
import torch
import numpy as np
from typing import Tuple, Union, List
import torch
from uniface.attribute.base import Attribute
from uniface.log import Logger
from uniface.constants import DDAMFNWeights
from uniface.face_utils import face_alignment
from uniface.log import Logger
from uniface.model_store import verify_model_weights
__all__ = ["Emotion"]
@@ -43,7 +44,15 @@ class Emotion(Attribute):
self.model_path = verify_model_weights(model_weights)
# Define emotion labels based on the selected model
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
self.emotion_labels = [
"Neutral",
"Happy",
"Sad",
"Surprise",
"Fear",
"Disgust",
"Angry",
]
if model_weights == DDAMFNWeights.AFFECNET8:
self.emotion_labels.append("Contempt")
@@ -63,7 +72,7 @@ class Emotion(Attribute):
Logger.info(f"Successfully initialized Emotion model on {self.device}")
except Exception as e:
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
raise RuntimeError(f"Failed to initialize Emotion model: {e}") from e
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
"""
@@ -77,7 +86,7 @@ class Emotion(Attribute):
torch.Tensor: The preprocessed image tensor ready for inference.
"""
landmark = np.asarray(landmark)
aligned_image, _ = face_alignment(image, landmark)
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
@@ -115,8 +124,8 @@ class Emotion(Attribute):
# TODO: below is only for testing, remove it later
if __name__ == "__main__":
from uniface.detection import create_detector
from uniface.constants import RetinaFaceWeights
from uniface.detection import create_detector
print("Initializing models for live inference...")
# 1. Initialize the face detector
@@ -145,26 +154,34 @@ if __name__ == "__main__":
# For each detected face, predict the emotion
for detection in detections:
box = detection['bbox']
landmark = detection['landmarks']
box = detection["bbox"]
landmark = detection["landmarks"]
x1, y1, x2, y2 = map(int, box)
# Predict attributes using the landmark
emotion, confidence = emotion_predictor.predict(frame, landmark)
# Prepare text and draw on the frame
label = f"{emotion} ({confidence:.2f})"
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(255, 0, 0),
2,
)
# Display the resulting frame
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
print("Inference stopped.")
print("Inference stopped.")

View File

@@ -2,12 +2,12 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import math
import itertools
import numpy as np
import math
from typing import List, Tuple
from typing import Tuple, List
import cv2
import numpy as np
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
min_sizes = [[16, 32], [64, 128], [256, 512]]
anchors = []
feature_maps = [
[
math.ceil(image_size[0] / step),
math.ceil(image_size[1] / step)
] for step in steps
]
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
for k, (map_height, map_width) in enumerate(feature_maps):
step = steps[k]

View File

@@ -5,6 +5,7 @@
from enum import Enum
from typing import Dict
# fmt: off
class SphereFaceWeights(str, Enum):
"""
@@ -82,83 +83,65 @@ class LandmarkWeights(str, Enum):
MODEL_URLS: Dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
RetinaFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx",
RetinaFaceWeights.MNET_050: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx",
RetinaFaceWeights.MNET_V1: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx",
RetinaFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx",
RetinaFaceWeights.RESNET18: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx",
RetinaFaceWeights.RESNET34: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx",
# MobileFace
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
MobileFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx",
MobileFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx",
MobileFaceWeights.MNET_V3_SMALL: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx",
MobileFaceWeights.MNET_V3_LARGE: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx",
# SphereFace
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
SphereFaceWeights.SPHERE20: "https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx",
SphereFaceWeights.SPHERE36: "https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx",
# ArcFace
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
ArcFaceWeights.MNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx",
ArcFaceWeights.RESNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx",
# SCRFD
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
SCRFDWeights.SCRFD_10G_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx",
SCRFDWeights.SCRFD_500M_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx",
# DDAFM
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
DDAMFNWeights.AFFECNET7: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script",
DDAMFNWeights.AFFECNET8: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script",
# AgeGender
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
AgeGenderWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx",
# Landmarks
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
LandmarkWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx",
}
MODEL_SHA256: Dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
RetinaFaceWeights.MNET_025: "b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5",
RetinaFaceWeights.MNET_050: "d8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37",
RetinaFaceWeights.MNET_V1: "75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153",
RetinaFaceWeights.MNET_V2: "3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757",
RetinaFaceWeights.RESNET18: "e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d",
RetinaFaceWeights.RESNET34: "bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630",
# MobileFace
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
MobileFaceWeights.MNET_025: "eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b",
MobileFaceWeights.MNET_V2: "38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89",
MobileFaceWeights.MNET_V3_SMALL: "d4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3",
MobileFaceWeights.MNET_V3_LARGE: "0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a",
# SphereFace
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
SphereFaceWeights.SPHERE20: "c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d",
SphereFaceWeights.SPHERE36: "13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20",
# ArcFace
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
ArcFaceWeights.MNET: "9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f",
ArcFaceWeights.RESNET: "4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43",
# SCRFD
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
SCRFDWeights.SCRFD_10G_KPS: "5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91",
SCRFDWeights.SCRFD_500M_KPS: "5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a",
# DDAFM
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
DDAMFNWeights.AFFECNET7: "10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d",
DDAMFNWeights.AFFECNET8: "8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487",
# AgeGender
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
AgeGenderWeights.DEFAULT: "4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb",
# Landmark
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
LandmarkWeights.DEFAULT: "f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf",
}
CHUNK_SIZE = 8192

View File

@@ -3,18 +3,19 @@
# GitHub: https://github.com/yakhyo
import numpy as np
from typing import Tuple, Dict, Any, List
from typing import Any, Dict, List
import numpy as np
from .scrfd import SCRFD
from .base import BaseDetector
from .retinaface import RetinaFace
from .scrfd import SCRFD
# Global cache for detector instances
_detector_cache: Dict[str, BaseDetector] = {}
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
def detect_faces(image: np.ndarray, method: str = "retinaface", **kwargs) -> List[Dict[str, Any]]:
"""
High-level face detection function.
@@ -38,7 +39,7 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
... print(f"BBox: {face['bbox']}")
"""
method_name = method.lower()
sorted_kwargs = sorted(kwargs.items())
cache_key = f"{method_name}_{str(sorted_kwargs)}"
@@ -50,7 +51,7 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
return detector.detect(image)
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
def create_detector(method: str = "retinaface", **kwargs) -> BaseDetector:
"""
Factory function to create face detectors.
@@ -88,18 +89,15 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
"""
method = method.lower()
if method == 'retinaface':
if method == "retinaface":
return RetinaFace(**kwargs)
elif method == 'scrfd':
elif method == "scrfd":
return SCRFD(**kwargs)
else:
available_methods = ['retinaface', 'scrfd']
raise ValueError(
f"Unsupported detection method: '{method}'. "
f"Available methods: {available_methods}"
)
available_methods = ["retinaface", "scrfd"]
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
@@ -110,36 +108,36 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
Dict[str, Dict[str, Any]]: Dictionary of detector information
"""
return {
'retinaface': {
'description': 'RetinaFace detector with high accuracy',
'supports_landmarks': True,
'paper': 'https://arxiv.org/abs/1905.00641',
'default_params': {
'model_name': 'mnet_v2',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'input_size': (640, 640)
}
"retinaface": {
"description": "RetinaFace detector with high accuracy",
"supports_landmarks": True,
"paper": "https://arxiv.org/abs/1905.00641",
"default_params": {
"model_name": "mnet_v2",
"conf_thresh": 0.5,
"nms_thresh": 0.4,
"input_size": (640, 640),
},
},
"scrfd": {
"description": "SCRFD detector - fast and accurate with efficient architecture",
"supports_landmarks": True,
"paper": "https://arxiv.org/abs/2105.04714",
"default_params": {
"model_name": "scrfd_10g_kps",
"conf_thresh": 0.5,
"nms_thresh": 0.4,
"input_size": (640, 640),
},
},
'scrfd': {
'description': 'SCRFD detector - fast and accurate with efficient architecture',
'supports_landmarks': True,
'paper': 'https://arxiv.org/abs/2105.04714',
'default_params': {
'model_name': 'scrfd_10g_kps',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'input_size': (640, 640)
}
}
}
__all__ = [
'detect_faces',
'create_detector',
'list_available_detectors',
'SCRFD',
'RetinaFace',
'BaseDetector',
"detect_faces",
"create_detector",
"list_available_detectors",
"SCRFD",
"RetinaFace",
"BaseDetector",
]

View File

@@ -6,9 +6,10 @@
Base classes for face detection.
"""
import numpy as np
from abc import ABC, abstractmethod
from typing import Tuple, Dict, Any
from typing import Any, Dict, Tuple
import numpy as np
class BaseDetector(ABC):
@@ -84,7 +85,7 @@ class BaseDetector(ABC):
Returns:
bool: True if landmarks are supported, False otherwise
"""
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
return hasattr(self, "_supports_landmarks") and self._supports_landmarks
def get_info(self) -> Dict[str, Any]:
"""
@@ -94,7 +95,7 @@ class BaseDetector(ABC):
Dict[str, Any]: Detector information
"""
return {
'name': self.__class__.__name__,
'supports_landmarks': self._supports_landmarks,
'config': self.config
"name": self.__class__.__name__,
"supports_landmarks": self._supports_landmarks,
"config": self.config,
}

View File

@@ -2,22 +2,22 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
import numpy as np
from typing import Tuple, List, Literal, Dict, Any
from uniface.constants import RetinaFaceWeights
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.constants import RetinaFaceWeights
from uniface.onnx_utils import create_onnx_session
from .base import BaseDetector
from .utils import (
decode_boxes,
decode_landmarks,
generate_anchors,
non_max_supression,
resize_image,
decode_boxes,
generate_anchors,
decode_landmarks
)
@@ -59,13 +59,13 @@ class RetinaFace(BaseDetector):
super().__init__(**kwargs)
self._supports_landmarks = True # RetinaFace supports landmarks
self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2)
self.conf_thresh = kwargs.get('conf_thresh', 0.5)
self.nms_thresh = kwargs.get('nms_thresh', 0.4)
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
self.post_nms_topk = kwargs.get('post_nms_topk', 750)
self.dynamic_size = kwargs.get('dynamic_size', False)
self.input_size = kwargs.get('input_size', (640, 640))
self.model_name = kwargs.get("model_name", RetinaFaceWeights.MNET_V2)
self.conf_thresh = kwargs.get("conf_thresh", 0.5)
self.nms_thresh = kwargs.get("nms_thresh", 0.4)
self.pre_nms_topk = kwargs.get("pre_nms_topk", 5000)
self.post_nms_topk = kwargs.get("post_nms_topk", 750)
self.dynamic_size = kwargs.get("dynamic_size", False)
self.input_size = kwargs.get("input_size", (640, 640))
Logger.info(
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
@@ -133,7 +133,7 @@ class RetinaFace(BaseDetector):
image: np.ndarray,
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2.0
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -178,14 +178,16 @@ class RetinaFace(BaseDetector):
# Calculate offsets from image center
center = (original_height // 2, original_width // 2)
offsets = np.vstack([
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
])
offsets = np.vstack(
[
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
]
)
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
# Calculate scores based on the chosen metric
if metric == 'max':
if metric == "max":
scores = areas
else:
scores = areas - offset_dist_squared * center_weight
@@ -199,15 +201,17 @@ class RetinaFace(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4].astype(float).tolist(),
'confidence': detections[i, 4].item(),
'landmarks': landmarks[i].astype(float).tolist()
"bbox": detections[i, :4].astype(float).tolist(),
"confidence": detections[i, 4].item(),
"landmarks": landmarks[i].astype(float).tolist(),
}
faces.append(face_dict)
return faces
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
def postprocess(
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
) -> Tuple[np.ndarray, np.ndarray]:
"""
Process the model outputs into final detection results.
@@ -226,7 +230,11 @@ class RetinaFace(BaseDetector):
- landmarks (np.ndarray): Array of detected facial landmarks.
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
"""
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
loc, conf, landmarks = (
outputs[0].squeeze(0),
outputs[1].squeeze(0),
outputs[2].squeeze(0),
)
# Decode boxes and landmarks
boxes = decode_boxes(loc, self._priors)
@@ -242,7 +250,7 @@ class RetinaFace(BaseDetector):
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
# Sort by scores
order = scores.argsort()[::-1][:self.pre_nms_topk]
order = scores.argsort()[::-1][: self.pre_nms_topk]
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
# Apply NMS
@@ -251,13 +259,22 @@ class RetinaFace(BaseDetector):
detections, landmarks = detections[keep], landmarks[keep]
# Keep top-k detections
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
detections, landmarks = (
detections[: self.post_nms_topk],
landmarks[: self.post_nms_topk],
)
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
return detections, landmarks
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
def _scale_detections(
self,
boxes: np.ndarray,
landmarks: np.ndarray,
resize_factor: float,
shape: Tuple[int, int],
) -> Tuple[np.ndarray, np.ndarray]:
# Scale bounding boxes and landmarks to the original image size.
bbox_scale = np.array([shape[0], shape[1]] * 2)
boxes = boxes * bbox_scale / resize_factor
@@ -276,12 +293,13 @@ def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
for (x, y) in points.astype(np.int32):
for x, y in points.astype(np.int32):
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
if __name__ == "__main__":
import cv2
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
print(detector.get_info())
cap = cv2.VideoCapture(0)
@@ -304,9 +322,9 @@ if __name__ == "__main__":
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
bbox = face["bbox"] # [x1, y1, x2, y2]
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
confidence = face["confidence"]
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
@@ -318,8 +336,15 @@ if __name__ == "__main__":
draw_keypoints(frame, points)
# Display face count
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):

View File

@@ -173,7 +173,11 @@ class SCRFD(BaseDetector):
return scores_list, bboxes_list, kpss_list
def detect(
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
self,
image: np.ndarray,
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2,
) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -311,7 +315,15 @@ if __name__ == "__main__":
draw_keypoints(frame, points)
# Display face count
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):

View File

@@ -2,12 +2,12 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import math
import itertools
import numpy as np
import math
from typing import List, Tuple
from typing import Tuple, List
import cv2
import numpy as np
def resize_image(frame, target_shape: Tuple[int, int] = (640, 640)) -> Tuple[np.ndarray, float]:
@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
min_sizes = [[16, 32], [64, 128], [256, 512]]
anchors = []
feature_maps = [
[
math.ceil(image_size[0] / step),
math.ceil(image_size[1] / step)
] for step in steps
]
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
for k, (map_height, map_width) in enumerate(feature_maps):
step = steps[k]

View File

@@ -2,13 +2,18 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Tuple, Union
import cv2
import numpy as np
from skimage.transform import SimilarityTransform
from typing import Tuple, Union
__all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"]
__all__ = [
"face_alignment",
"compute_similarity",
"bbox_center_alignment",
"transform_points_2d",
]
# Reference alignment for facial landmarks (ArcFace)
@@ -18,9 +23,9 @@ reference_alignment: np.ndarray = np.array(
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
[70.7299, 92.2041],
],
dtype=np.float32
dtype=np.float32,
)
@@ -72,7 +77,11 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
return matrix, inverse_matrix
def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
def face_alignment(
image: np.ndarray,
landmark: np.ndarray,
image_size: Union[int, Tuple[int, int]] = 112,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Align the face in the input image based on the given facial landmarks.

View File

@@ -2,11 +2,11 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from .models import Landmark106
from .base import BaseLandmarker
from .models import Landmark106
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
def create_landmarker(method: str = "2d106det", **kwargs) -> BaseLandmarker:
"""
Factory function to create facial landmark predictors.
@@ -18,15 +18,11 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
Initialized landmarker instance.
"""
method = method.lower()
if method == '2d106det':
if method == "2d106det":
return Landmark106(**kwargs)
else:
available = ['2d106det']
available = ["2d106det"]
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
"create_landmarker",
"Landmark106",
"BaseLandmarker"
]
__all__ = ["create_landmarker", "Landmark106", "BaseLandmarker"]

View File

@@ -3,6 +3,7 @@
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
import numpy as np
@@ -10,6 +11,7 @@ class BaseLandmarker(ABC):
"""
Abstract Base Class for all facial landmark models.
"""
@abstractmethod
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""

View File

@@ -2,18 +2,20 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
from typing import Tuple
from uniface.log import Logger
import cv2
import numpy as np
from uniface.constants import LandmarkWeights
from uniface.model_store import verify_model_weights
from uniface.face_utils import bbox_center_alignment, transform_points_2d
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from .base import BaseLandmarker
__all__ = ['Landmark']
__all__ = ["Landmark"]
class Landmark106(BaseLandmarker):
@@ -40,15 +42,13 @@ class Landmark106(BaseLandmarker):
>>> print(landmarks.shape)
(106, 2)
"""
def __init__(
self,
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192)
input_size: Tuple[int, int] = (192, 192),
) -> None:
Logger.info(
f"Initializing Facial Landmark with model={model_name}, "
f"input_size={input_size}"
)
Logger.info(f"Initializing Facial Landmark with model={model_name}, input_size={input_size}")
self.input_size = input_size
self.input_std = 1.0
self.input_mean = 0.0
@@ -83,7 +83,7 @@ class Landmark106(BaseLandmarker):
except Exception as e:
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize landmark model: {e}")
raise RuntimeError(f"Failed to initialize landmark model: {e}") from e
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Prepares a face crop for inference.
@@ -108,8 +108,11 @@ class Landmark106(BaseLandmarker):
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
face_blob = cv2.dnn.blobFromImage(
aligned_face, 1.0 / self.input_std, self.input_size,
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
aligned_face,
1.0 / self.input_std,
self.input_size,
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True,
)
return face_blob, transform_matrix
@@ -129,7 +132,7 @@ class Landmark106(BaseLandmarker):
"""
landmarks = predictions.reshape((-1, 2))
landmarks[:, 0:2] += 1
landmarks[:, 0:2] *= (self.input_size[0] // 2)
landmarks[:, 0:2] *= self.input_size[0] // 2
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
landmarks = transform_points_2d(landmarks, inverse_matrix)
@@ -149,14 +152,11 @@ class Landmark106(BaseLandmarker):
np.ndarray: An array of predicted landmark points with shape (106, 2).
"""
face_blob, transform_matrix = self.preprocess(image, bbox)
raw_predictions = self.session.run(
self.output_names, {self.input_names[0]: face_blob}
)[0][0]
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
landmarks = self.postprocess(raw_predictions, transform_matrix)
return landmarks
# Testing code
if __name__ == "__main__":
from uniface.detection import RetinaFace
@@ -183,21 +183,21 @@ if __name__ == "__main__":
if not faces:
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
continue
# 3. Loop through the list of face dictionaries
for face in faces:
# Extract the bounding box
bbox = face['bbox']
bbox = face["bbox"]
# 4. Get landmarks for the current face using its bounding box
landmarks = landmarker.get_landmarks(frame, bbox)
# --- Drawing Logic ---
# Draw the landmarks
for (x, y) in landmarks.astype(int):
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
# Draw the bounding box
@@ -205,7 +205,7 @@ if __name__ == "__main__":
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()

View File

@@ -19,10 +19,7 @@ def enable_logging(level=logging.INFO):
"""
Logger.handlers.clear()
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
))
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
Logger.addHandler(handler)
Logger.setLevel(level)
Logger.propagate = False

View File

@@ -2,19 +2,19 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import os
import hashlib
import os
import requests
from tqdm import tqdm
from uniface.log import Logger
import uniface.constants as const
from uniface.log import Logger
__all__ = ["verify_model_weights"]
__all__ = ['verify_model_weights']
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
def verify_model_weights(model_name: str, root: str = "~/.uniface/models") -> str:
"""
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
@@ -53,7 +53,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
raise ValueError(f"No URL found for model '{model_name}'")
file_ext = os.path.splitext(url)[1]
model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
model_path = os.path.normpath(os.path.join(root, f"{model_name.value}{file_ext}"))
if not os.path.exists(model_path):
Logger.info(f"Downloading model '{model_name}' from {url}")
@@ -62,7 +62,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
except Exception as e:
Logger.error(f"Failed to download model '{model_name}': {e}")
raise ConnectionError(f"Download failed for '{model_name}'")
raise ConnectionError(f"Download failed for '{model_name}'") from e
expected_hash = const.MODEL_SHA256.get(model_name)
if expected_hash and not verify_file_hash(model_path, expected_hash):
@@ -78,18 +78,21 @@ def download_file(url: str, dest_path: str) -> None:
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(dest_path, "wb") as file, tqdm(
desc=f"Downloading {dest_path}",
unit='B',
unit_scale=True,
unit_divisor=1024
) as progress:
with (
open(dest_path, "wb") as file,
tqdm(
desc=f"Downloading {dest_path}",
unit="B",
unit_scale=True,
unit_divisor=1024,
) as progress,
):
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
if chunk:
file.write(chunk)
progress.update(len(chunk))
except requests.RequestException as e:
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
raise ConnectionError(f"Failed to download file from {url}. Error: {e}") from e
def verify_file_hash(file_path: str, expected_hash: str) -> bool:

View File

@@ -77,10 +77,25 @@ def create_onnx_session(model_path: str, providers: List[str] = None) -> ort.Inf
if providers is None:
providers = get_available_providers()
# Suppress ONNX Runtime warnings (e.g., CoreML partition warnings)
# Log levels: 0=VERBOSE, 1=INFO, 2=WARNING, 3=ERROR, 4=FATAL
sess_options = ort.SessionOptions()
sess_options.log_severity_level = 3 # Only show ERROR and FATAL
try:
session = ort.InferenceSession(model_path, providers=providers)
session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers)
active_provider = session.get_providers()[0]
Logger.debug(f"Session created with provider: {active_provider}")
# Show user-friendly message about which provider is being used
provider_names = {
"CoreMLExecutionProvider": "CoreML (Apple Silicon)",
"CUDAExecutionProvider": "CUDA (NVIDIA GPU)",
"CPUExecutionProvider": "CPU",
}
provider_display = provider_names.get(active_provider, active_provider)
print(f"Model loaded ({provider_display})")
return session
except Exception as e:
Logger.error(f"Failed to create ONNX session: {e}", exc_info=True)

View File

@@ -2,12 +2,12 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Dict
from .models import ArcFace, MobileFace, SphereFace
from .base import BaseRecognizer
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
from .base import BaseRecognizer
from .models import ArcFace, MobileFace, SphereFace
def create_recognizer(method: str = "arcface", **kwargs) -> BaseRecognizer:
"""
Factory function to create face recognizers.
@@ -44,20 +44,21 @@ def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
"""
method = method.lower()
if method == 'arcface':
if method == "arcface":
return ArcFace(**kwargs)
elif method == 'mobileface':
elif method == "mobileface":
return MobileFace(**kwargs)
elif method == 'sphereface':
elif method == "sphereface":
return SphereFace(**kwargs)
else:
available = ['arcface', 'mobileface', 'sphereface']
available = ["arcface", "mobileface", "sphereface"]
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
"create_recognizer",
"ArcFace",
"MobileFace",
"SphereFace",
"BaseRecognizer",
]
]

View File

@@ -3,13 +3,14 @@
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import List, Tuple, Union
import cv2
import numpy as np
from dataclasses import dataclass
from typing import Tuple, Union, List
from uniface.log import Logger
from uniface.face_utils import face_alignment
from uniface.log import Logger
from uniface.onnx_utils import create_onnx_session
@@ -18,6 +19,7 @@ class PreprocessConfig:
"""
Configuration for preprocessing images before feeding them into the model.
"""
input_mean: Union[float, List[float]] = 127.5
input_std: Union[float, List[float]] = 127.5
input_size: Tuple[int, int] = (112, 112)
@@ -28,6 +30,7 @@ class BaseRecognizer(ABC):
Abstract Base Class for all face recognition models.
It provides the core functionality for preprocessing, inference, and embedding extraction.
"""
@abstractmethod
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
"""
@@ -73,7 +76,10 @@ class BaseRecognizer(ABC):
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
except Exception as e:
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
Logger.error(
f"Failed to load face encoder model from '{self.model_path}'",
exc_info=True,
)
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
@@ -91,8 +97,9 @@ class BaseRecognizer(ABC):
if isinstance(self.input_std, (list, tuple)):
# Per-channel normalization
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
np.array(self.input_std, dtype=np.float32)
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
self.input_std, dtype=np.float32
)
# Change to NCHW (batch, channels, height, width)
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
@@ -104,7 +111,7 @@ class BaseRecognizer(ABC):
scalefactor=1.0 / self.input_std,
size=self.input_size,
mean=(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
swapRB=True, # Convert BGR to RGB
)
return blob

View File

@@ -6,6 +6,7 @@ from typing import Optional
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
from uniface.model_store import verify_model_weights
from .base import BaseRecognizer, PreprocessConfig
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
@@ -33,14 +34,10 @@ class ArcFace(BaseRecognizer):
def __init__(
self,
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
@@ -67,14 +64,10 @@ class MobileFace(BaseRecognizer):
def __init__(
self,
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
@@ -101,14 +94,10 @@ class SphereFace(BaseRecognizer):
def __init__(
self,
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)

View File

@@ -2,9 +2,10 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import List, Union
import cv2
import numpy as np
from typing import List, Union
def draw_detections(
@@ -12,7 +13,7 @@ def draw_detections(
bboxes: Union[np.ndarray, List[List[float]]],
scores: Union[np.ndarray, List[float]],
landmarks: Union[np.ndarray, List[List[List[float]]]],
vis_threshold: float = 0.6
vis_threshold: float = 0.6,
):
"""
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
@@ -42,8 +43,15 @@ def draw_detections(
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
# Draw score
cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)
cv2.putText(
image,
f"{score:.2f}",
(bbox[0], bbox[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
thickness,
)
# Draw landmarks
for j, point in enumerate(landmark_set):