mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 00:52:25 +00:00
ref: Update some refactoring files for testing
This commit is contained in:
@@ -1,97 +1,68 @@
|
||||
# Scripts
|
||||
|
||||
Collection of example scripts demonstrating UniFace functionality.
|
||||
Scripts for testing UniFace features.
|
||||
|
||||
## Available Scripts
|
||||
|
||||
- `run_detection.py` - Face detection on images
|
||||
- `run_age_gender.py` - Age and gender prediction
|
||||
- `run_landmarks.py` - Facial landmark detection
|
||||
- `run_recognition.py` - Face recognition and embeddings
|
||||
- `run_face_search.py` - Face search and matching
|
||||
- `run_video_detection.py` - Video processing with face detection
|
||||
- `batch_process.py` - Batch processing of image folders
|
||||
- `download_model.py` - Download and manage models
|
||||
| Script | Description |
|
||||
|--------|-------------|
|
||||
| `run_detection.py` | Face detection on image or webcam |
|
||||
| `run_age_gender.py` | Age and gender prediction |
|
||||
| `run_landmarks.py` | 106-point facial landmark detection |
|
||||
| `run_recognition.py` | Face embedding extraction and comparison |
|
||||
| `run_face_search.py` | Real-time face matching against reference |
|
||||
| `run_video_detection.py` | Face detection on video files |
|
||||
| `batch_process.py` | Batch process folder of images |
|
||||
| `download_model.py` | Download model weights |
|
||||
| `sha256_generate.py` | Generate SHA256 hash for model files |
|
||||
|
||||
## Quick Start
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
# Face detection
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
python scripts/run_detection.py --webcam
|
||||
|
||||
# Age and gender detection
|
||||
# Age and gender
|
||||
python scripts/run_age_gender.py --image assets/test.jpg
|
||||
|
||||
# Webcam demo
|
||||
python scripts/run_age_gender.py --webcam
|
||||
|
||||
# Landmarks
|
||||
python scripts/run_landmarks.py --image assets/test.jpg
|
||||
python scripts/run_landmarks.py --webcam
|
||||
|
||||
# Face recognition (extract embedding)
|
||||
python scripts/run_recognition.py --image assets/test.jpg
|
||||
|
||||
# Face comparison
|
||||
python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
# Face search (match webcam against reference)
|
||||
python scripts/run_face_search.py --image reference.jpg
|
||||
|
||||
# Video processing
|
||||
python scripts/run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
# Batch processing
|
||||
python scripts/batch_process.py --input images/ --output results/
|
||||
|
||||
# Download models
|
||||
python scripts/download_model.py --model-type retinaface
|
||||
python scripts/download_model.py # downloads all
|
||||
```
|
||||
|
||||
## Import Examples
|
||||
|
||||
The scripts use direct class imports for better developer experience:
|
||||
|
||||
```python
|
||||
# Face Detection
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
|
||||
detector = RetinaFace() # or SCRFD()
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Face Recognition
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
recognizer = ArcFace() # or MobileFace(), SphereFace()
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
|
||||
# Age & Gender
|
||||
from uniface.attribute import AgeGender
|
||||
|
||||
age_gender = AgeGender()
|
||||
gender, age = age_gender.predict(image, bbox)
|
||||
|
||||
# Landmarks
|
||||
from uniface.landmark import Landmark106
|
||||
|
||||
landmarker = Landmark106()
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
```
|
||||
|
||||
## Available Classes
|
||||
|
||||
**Detection:**
|
||||
- `RetinaFace` - High accuracy face detection
|
||||
- `SCRFD` - Fast face detection
|
||||
|
||||
**Recognition:**
|
||||
- `ArcFace` - High accuracy face recognition
|
||||
- `MobileFace` - Lightweight face recognition
|
||||
- `SphereFace` - Alternative face recognition
|
||||
|
||||
**Attributes:**
|
||||
- `AgeGender` - Age and gender prediction
|
||||
|
||||
**Landmarks:**
|
||||
- `Landmark106` - 106-point facial landmarks
|
||||
|
||||
## Common Options
|
||||
|
||||
Most scripts support:
|
||||
- `--help` - Show usage information
|
||||
- `--verbose` - Enable detailed logging
|
||||
- `--detector` - Choose detector (retinaface, scrfd)
|
||||
- `--threshold` - Set confidence threshold
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--image` | Path to input image |
|
||||
| `--webcam` | Use webcam instead of image |
|
||||
| `--detector` | Choose detector: `retinaface` or `scrfd` |
|
||||
| `--threshold` | Visualization confidence threshold (default: 0.6) |
|
||||
| `--save_dir` | Output directory (default: `outputs`) |
|
||||
|
||||
## Testing
|
||||
## Quick Test
|
||||
|
||||
Run basic functionality test:
|
||||
```bash
|
||||
python scripts/run_detection.py --image assets/test.jpg
|
||||
```
|
||||
|
||||
For comprehensive testing, see the main project tests:
|
||||
```bash
|
||||
pytest tests/
|
||||
```
|
||||
|
||||
@@ -1,156 +1,87 @@
|
||||
"""Batch Image Processing Script"""
|
||||
# Batch face detection on a folder of images
|
||||
# Usage: python batch_process.py --input images/ --output results/
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def get_image_files(input_dir: Path, extensions: tuple) -> list:
|
||||
image_files = []
|
||||
files = []
|
||||
for ext in extensions:
|
||||
image_files.extend(input_dir.glob(f"*.{ext}"))
|
||||
image_files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
|
||||
return sorted(image_files)
|
||||
files.extend(input_dir.glob(f"*.{ext}"))
|
||||
files.extend(input_dir.glob(f"*.{ext.upper()}"))
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def process_single_image(detector, image_path: Path, output_dir: Path,
|
||||
vis_threshold: float, skip_existing: bool) -> dict:
|
||||
output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}"
|
||||
|
||||
# Skip if already processed
|
||||
if skip_existing and output_path.exists():
|
||||
return {"status": "skipped", "faces": 0}
|
||||
|
||||
# Load image
|
||||
def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int:
|
||||
"""Process single image. Returns face count or -1 on error."""
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return {"status": "error", "error": "Failed to load image"}
|
||||
return -1
|
||||
|
||||
# Detect faces
|
||||
try:
|
||||
faces = detector.detect(image)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imwrite(str(output_path), image)
|
||||
|
||||
return {"status": "success", "faces": len(faces)}
|
||||
|
||||
|
||||
def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple,
|
||||
vis_threshold: float, skip_existing: bool):
|
||||
input_path = Path(input_dir)
|
||||
output_path = Path(output_dir)
|
||||
|
||||
# Create output directory
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Get image files
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in '{input_dir}' with extensions {extensions}")
|
||||
return
|
||||
|
||||
print(f"Input: {input_dir}")
|
||||
print(f"Output: {output_dir}")
|
||||
print(f"Found {len(image_files)} images\n")
|
||||
|
||||
# Process images
|
||||
results = {
|
||||
"success": 0,
|
||||
"skipped": 0,
|
||||
"error": 0,
|
||||
"total_faces": 0
|
||||
}
|
||||
|
||||
with tqdm(image_files, desc="Processing images", unit="img") as pbar:
|
||||
for image_path in pbar:
|
||||
result = process_single_image(
|
||||
detector, image_path, output_path,
|
||||
vis_threshold, skip_existing
|
||||
)
|
||||
|
||||
if result["status"] == "success":
|
||||
results["success"] += 1
|
||||
results["total_faces"] += result["faces"]
|
||||
pbar.set_postfix({"faces": result["faces"]})
|
||||
elif result["status"] == "skipped":
|
||||
results["skipped"] += 1
|
||||
else:
|
||||
results["error"] += 1
|
||||
print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}")
|
||||
|
||||
# Print summary
|
||||
print(f"\nBatch processing complete!")
|
||||
print(f" Total images: {len(image_files)}")
|
||||
print(f" Successfully processed: {results['success']}")
|
||||
print(f" Skipped: {results['skipped']}")
|
||||
print(f" Errors: {results['error']}")
|
||||
print(f" Total faces detected: {results['total_faces']}")
|
||||
if results['success'] > 0:
|
||||
print(f" Average faces per image: {results['total_faces']/results['success']:.2f}")
|
||||
print(f"\nResults saved to: {output_dir}")
|
||||
return len(faces)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Batch process images with face detection")
|
||||
parser.add_argument("--input", type=str, required=True,
|
||||
help="Input directory containing images")
|
||||
parser.add_argument("--output", type=str, required=True,
|
||||
help="Output directory for processed images")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp",
|
||||
help="Comma-separated list of image extensions")
|
||||
parser.add_argument("--skip_existing", action="store_true",
|
||||
help="Skip files that already exist in output directory")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser.add_argument("--input", type=str, required=True, help="Input directory")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output directory")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input directory exists
|
||||
if not Path(args.input).exists():
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input directory '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Parse extensions
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(','))
|
||||
extensions = tuple(ext.strip() for ext in args.extensions.split(","))
|
||||
image_files = get_image_files(input_path, extensions)
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
if not image_files:
|
||||
print(f"No images found with extensions {extensions}")
|
||||
return
|
||||
|
||||
# Process batch
|
||||
batch_process(detector, args.input, args.output, extensions,
|
||||
args.threshold, args.skip_existing)
|
||||
print(f"Found {len(image_files)} images")
|
||||
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
|
||||
success, errors, total_faces = 0, 0, 0
|
||||
|
||||
for img_path in tqdm(image_files, desc="Processing", unit="img"):
|
||||
out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}"
|
||||
result = process_image(detector, img_path, out_path, args.threshold)
|
||||
|
||||
if result >= 0:
|
||||
success += 1
|
||||
total_faces += result
|
||||
else:
|
||||
errors += 1
|
||||
print(f"\nFailed: {img_path.name}")
|
||||
|
||||
print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,76 +1,59 @@
|
||||
import argparse
|
||||
|
||||
from uniface.constants import (
|
||||
RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights,
|
||||
SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights
|
||||
AgeGenderWeights,
|
||||
ArcFaceWeights,
|
||||
DDAMFNWeights,
|
||||
LandmarkWeights,
|
||||
MobileFaceWeights,
|
||||
RetinaFaceWeights,
|
||||
SCRFDWeights,
|
||||
SphereFaceWeights,
|
||||
)
|
||||
from uniface.model_store import verify_model_weights
|
||||
|
||||
|
||||
# All available model types
|
||||
ALL_MODEL_TYPES = {
|
||||
'retinaface': RetinaFaceWeights,
|
||||
'sphereface': SphereFaceWeights,
|
||||
'mobileface': MobileFaceWeights,
|
||||
'arcface': ArcFaceWeights,
|
||||
'scrfd': SCRFDWeights,
|
||||
'ddamfn': DDAMFNWeights,
|
||||
'agegender': AgeGenderWeights,
|
||||
'landmark': LandmarkWeights,
|
||||
MODEL_TYPES = {
|
||||
"retinaface": RetinaFaceWeights,
|
||||
"sphereface": SphereFaceWeights,
|
||||
"mobileface": MobileFaceWeights,
|
||||
"arcface": ArcFaceWeights,
|
||||
"scrfd": SCRFDWeights,
|
||||
"ddamfn": DDAMFNWeights,
|
||||
"agegender": AgeGenderWeights,
|
||||
"landmark": LandmarkWeights,
|
||||
}
|
||||
|
||||
|
||||
def download_models(model_enum):
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f" Done: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f" Failed: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Download and verify model weights.")
|
||||
parser = argparse.ArgumentParser(description="Download model weights")
|
||||
parser.add_argument(
|
||||
"--model-type",
|
||||
type=str,
|
||||
choices=list(ALL_MODEL_TYPES.keys()),
|
||||
help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.",
|
||||
choices=list(MODEL_TYPES.keys()),
|
||||
help="Model type to download. If not specified, downloads all.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model and not args.model_type:
|
||||
# Backward compatibility - assume RetinaFace
|
||||
try:
|
||||
weight = RetinaFaceWeights[args.model]
|
||||
print(f"Downloading RetinaFace model: {weight.value}")
|
||||
verify_model_weights(weight)
|
||||
print("Model downloaded successfully.")
|
||||
except KeyError:
|
||||
print(f"Invalid RetinaFace model: {args.model}")
|
||||
print(f"Available models: {[m.name for m in RetinaFaceWeights]}")
|
||||
return
|
||||
|
||||
if args.model_type:
|
||||
# Download all models from specific type
|
||||
model_enum = ALL_MODEL_TYPES[args.model_type]
|
||||
print(f"Downloading all {args.model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
print(f"Downloading {args.model_type} models...")
|
||||
download_models(MODEL_TYPES[args.model_type])
|
||||
else:
|
||||
# Download all models from all types
|
||||
print("Downloading all models...")
|
||||
for model_type, model_enum in ALL_MODEL_TYPES.items():
|
||||
print(f"\nDownloading {model_type} models...")
|
||||
for weight in model_enum:
|
||||
print(f"Downloading: {weight.value}")
|
||||
try:
|
||||
verify_model_weights(weight)
|
||||
print(f"Downloaded: {weight.value}")
|
||||
except Exception as e:
|
||||
print(f"Failed to download {weight.value}: {e}")
|
||||
for name, model_enum in MODEL_TYPES.items():
|
||||
print(f"\n{name}:")
|
||||
download_models(model_enum)
|
||||
|
||||
print("\nDownload process completed.")
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,158 +1,105 @@
|
||||
"""Age and Gender Detection Demo Script"""
|
||||
# Age and gender prediction on detected faces
|
||||
# Usage: python run_age_gender.py --image path/to/image.jpg
|
||||
# python run_age_gender.py --webcam
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, AgeGender
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, AgeGender, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6):
|
||||
def draw_age_gender_label(image, bbox, gender: str, age: int):
|
||||
"""Draw age/gender label above the bounding box."""
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
|
||||
def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", threshold: float = 0.6):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
print(f" Face {i+1}: {gender}, {age} years old")
|
||||
gender, age = age_gender.predict(image, face["bbox"])
|
||||
print(f" Face {i + 1}: {gender}, {age} years old")
|
||||
draw_age_gender_label(image, face["bbox"], gender, age)
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(image, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(image, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, age_gender, vis_threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
def run_webcam(detector, age_gender, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
print("Press 'q' to quit")
|
||||
|
||||
frame_count = 0
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
faces = detector.detect(frame)
|
||||
|
||||
frame_count += 1
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face["bbox"]) # predict per face
|
||||
draw_age_gender_label(frame, face["bbox"], gender, age)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
# Predict and draw age/gender for each face
|
||||
for face in faces:
|
||||
gender, age = age_gender.predict(frame, face['bbox'])
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Draw age and gender text
|
||||
bbox = face['bbox']
|
||||
x1, y1 = int(bbox[0]), int(bbox[1])
|
||||
text = f"{gender}, {age}y"
|
||||
|
||||
# Background rectangle for text
|
||||
(text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
||||
cv2.rectangle(frame, (x1, y1 - text_height - 10),
|
||||
(x1 + text_width + 10, y1), (0, 255, 0), -1)
|
||||
cv2.putText(frame, text, (x1 + 5, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("Age & Gender Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run age and gender detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing age/gender model...")
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
age_gender = AgeGender()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, age_gender, args.threshold)
|
||||
else:
|
||||
|
||||
@@ -1,79 +1,86 @@
|
||||
import os
|
||||
import cv2
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
# Face detection on image or webcam
|
||||
# Usage: python run_detection.py --image path/to/image.jpg
|
||||
# python run_detection.py --webcam
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import cv2
|
||||
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
# 1. Get the list of face dictionaries from the detector
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
# 2. Unpack the data into separate lists
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
|
||||
# 3. Pass the unpacked lists to the drawing function
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
bboxes = [face["bbox"] for face in faces]
|
||||
scores = [face["confidence"] for face in faces]
|
||||
landmarks = [face["landmarks"] for face in faces]
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
print(f"Output saved at: {output_path}")
|
||||
print(f"Output saved: {output_path}")
|
||||
|
||||
|
||||
def run_webcam(detector, threshold: float = 0.6):
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# unpack face data for visualization
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow("Face Detection", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run face detection on an image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
|
||||
parser.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Detection method to use."
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
|
||||
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Run face detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
print(f"Initializing detector: {args.method}")
|
||||
if args.method == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
detector = RetinaFace() if args.method == "retinaface" else SCRFD()
|
||||
|
||||
if args.webcam:
|
||||
run_webcam(detector, args.threshold)
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
avg_time = 0
|
||||
for i in range(args.iterations):
|
||||
start = time.time()
|
||||
run_inference(detector, args.image, args.threshold, args.save_dir)
|
||||
elapsed = time.time() - start
|
||||
print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds")
|
||||
if i >= 0: # Avoid counting the first run if it includes model loading time
|
||||
avg_time += elapsed
|
||||
|
||||
if args.iterations > 1:
|
||||
# Adjust average calculation to exclude potential first-run overhead
|
||||
effective_iterations = max(1, args.iterations)
|
||||
print(
|
||||
f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
|
||||
process_image(detector, args.image, args.threshold, args.save_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,13 +1,25 @@
|
||||
# Real-time face search: match webcam faces against a reference image
|
||||
# Usage: python run_face_search.py --image reference.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
@@ -17,44 +29,37 @@ def extract_reference_embedding(detector, recognizer, image_path: str) -> np.nda
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
# Get landmarks from the first detected face dictionary
|
||||
landmarks = np.array(faces[0]["landmarks"])
|
||||
|
||||
# Use normalized embedding for more reliable similarity comparison
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
return embedding
|
||||
return recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
cap = cv2.VideoCapture(0)
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
print("Webcam started. Press 'q' to quit.")
|
||||
|
||||
print("Press 'q' to quit")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Loop through each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from the dictionary
|
||||
bbox = face["bbox"]
|
||||
landmarks = np.array(face["landmarks"])
|
||||
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Get the normalized embedding for the current face
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
sim = compute_similarity(ref_embedding, embedding) # compare with reference
|
||||
|
||||
# Compare with the reference embedding
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
# Draw results
|
||||
# green = match, red = unknown
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
@@ -67,43 +72,20 @@ def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser.add_argument(
|
||||
"--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=["arcface", "mobileface", "sphereface"],
|
||||
help="Face recognition method.",
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
parser = argparse.ArgumentParser(description="Face search using a reference image")
|
||||
parser.add_argument("--image", type=str, required=True, help="Reference face image")
|
||||
parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold")
|
||||
parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--recognizer", type=str, default="arcface", choices=["arcface", "mobileface", "sphereface"])
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
|
||||
enable_logging()
|
||||
|
||||
print("Initializing models...")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
print("Extracting reference embedding...")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
run_video(detector, recognizer, ref_embedding, args.threshold)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
"""Facial Landmark Detection Demo Script"""
|
||||
# 106-point facial landmark detection
|
||||
# Usage: python run_landmarks.py --image path/to/image.jpg
|
||||
# python run_landmarks.py --webcam
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import argparse
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from uniface import RetinaFace, SCRFD, Landmark106
|
||||
import cv2
|
||||
|
||||
from uniface import SCRFD, Landmark106, RetinaFace
|
||||
|
||||
|
||||
def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
|
||||
@@ -14,39 +17,25 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
print(f"Processing: {image_path}")
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(image)
|
||||
print(f" Detected {len(faces)} face(s)")
|
||||
print(f"Detected {len(faces)} face(s)")
|
||||
|
||||
if not faces:
|
||||
print(" No faces detected")
|
||||
return
|
||||
|
||||
# Process each face
|
||||
for i, face in enumerate(faces):
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(image, bbox)
|
||||
print(f" Face {i+1}: Extracted {len(landmarks)} landmarks")
|
||||
print(f" Face {i + 1}: {len(landmarks)} landmarks")
|
||||
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(image, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Add face count
|
||||
cv2.putText(image, f"Face {i+1}", (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
cv2.putText(image, f"Face {i + 1}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
# Add total count
|
||||
cv2.putText(image, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Save result
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
|
||||
cv2.imwrite(output_path, image)
|
||||
@@ -54,91 +43,54 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output
|
||||
|
||||
|
||||
def run_webcam(detector, landmarker):
|
||||
cap = cv2.VideoCapture(0)
|
||||
|
||||
cap = cv2.VideoCapture(0) # 0 = default webcam
|
||||
if not cap.isOpened():
|
||||
print("Cannot open webcam")
|
||||
return
|
||||
|
||||
print("Webcam opened")
|
||||
print("Press 'q' to quit\n")
|
||||
print("Press 'q' to quit")
|
||||
|
||||
frame_count = 0
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
frame = cv2.flip(frame, 1) # mirror for natural interaction
|
||||
if not ret:
|
||||
break
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
faces = detector.detect(frame)
|
||||
|
||||
frame_count += 1
|
||||
for face in faces:
|
||||
bbox = face["bbox"]
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
landmarks = landmarker.get_landmarks(frame, bbox) # 106 points
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
|
||||
# Process each face
|
||||
for face in faces:
|
||||
# Draw bounding box
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
# Get and draw 106 landmarks
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 1, (0, 255, 0), -1)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Add info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
||||
|
||||
cv2.imshow("106-Point Landmarks", frame)
|
||||
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nInterrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print(f"\nProcessed {frame_count} frames")
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run facial landmark detection")
|
||||
parser.add_argument("--image", type=str, help="Path to input image")
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs",
|
||||
help="Directory to save output images")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser.add_argument("--webcam", action="store_true", help="Use webcam")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--save_dir", type=str, default="outputs")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input
|
||||
if not args.image and not args.webcam:
|
||||
parser.error("Either --image or --webcam must be specified")
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize models
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print("Initializing landmark detector...")
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
landmarker = Landmark106()
|
||||
print("Models initialized\n")
|
||||
|
||||
# Process
|
||||
if args.webcam:
|
||||
run_webcam(detector, landmarker)
|
||||
else:
|
||||
|
||||
@@ -1,10 +1,24 @@
|
||||
import cv2
|
||||
# Face recognition: extract embeddings or compare two faces
|
||||
# Usage: python run_recognition.py --image path/to/image.jpg
|
||||
# python run_recognition.py --image1 face1.jpg --image2 face2.jpg
|
||||
|
||||
import argparse
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, SCRFD
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
from uniface.detection import SCRFD, RetinaFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
from uniface.recognition import ArcFace, MobileFace, SphereFace
|
||||
|
||||
|
||||
def get_recognizer(name: str):
|
||||
if name == "arcface":
|
||||
return ArcFace()
|
||||
elif name == "mobileface":
|
||||
return MobileFace()
|
||||
else:
|
||||
return SphereFace()
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
@@ -14,38 +28,29 @@ def run_inference(detector, recognizer, image_path: str):
|
||||
return
|
||||
|
||||
faces = detector.detect(image)
|
||||
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
|
||||
print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...")
|
||||
|
||||
# Process the first detected face
|
||||
first_face = faces[0]
|
||||
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
|
||||
|
||||
# Extract embedding using the landmarks from the face dictionary
|
||||
landmarks = np.array(faces[0]["landmarks"]) # 5-point landmarks for alignment
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized
|
||||
|
||||
# Print some info about the embeddings
|
||||
print(f" - Embedding shape: {embedding.shape}")
|
||||
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
|
||||
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
|
||||
print(f" Embedding shape: {embedding.shape}")
|
||||
print(f" L2 norm (raw): {np.linalg.norm(embedding):.4f}")
|
||||
print(f" L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
|
||||
|
||||
# Load images
|
||||
img1 = cv2.imread(image1_path)
|
||||
img2 = cv2.imread(image2_path)
|
||||
|
||||
if img1 is None or img2 is None:
|
||||
print(f"Error: Failed to load images")
|
||||
print("Error: Failed to load one or both images")
|
||||
return
|
||||
|
||||
# Detect faces
|
||||
faces1 = detector.detect(img1)
|
||||
faces2 = detector.detect(img2)
|
||||
|
||||
@@ -53,74 +58,39 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
|
||||
print("Error: No faces detected in one or both images")
|
||||
return
|
||||
|
||||
# Get landmarks for first face in each image
|
||||
landmarks1 = np.array(faces1[0]['landmarks'])
|
||||
landmarks2 = np.array(faces2[0]['landmarks'])
|
||||
landmarks1 = np.array(faces1[0]["landmarks"])
|
||||
landmarks2 = np.array(faces2[0]["landmarks"])
|
||||
|
||||
# Get normalized embeddings
|
||||
embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
|
||||
embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
|
||||
|
||||
# Compute similarity
|
||||
# cosine similarity for normalized embeddings
|
||||
similarity = compute_similarity(embedding1, embedding2, normalized=True)
|
||||
is_match = similarity > threshold
|
||||
|
||||
print(f"Similarity: {similarity:.4f}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'}")
|
||||
print(f"Threshold: {threshold}")
|
||||
print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison.")
|
||||
parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.")
|
||||
parser.add_argument("--image1", type=str, help="Path to first image for comparison.")
|
||||
parser.add_argument("--image2", type=str, help="Path to second image for comparison.")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.")
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method to use."
|
||||
)
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Face recognition and comparison")
|
||||
parser.add_argument("--image", type=str, help="Single image for embedding extraction")
|
||||
parser.add_argument("--image1", type=str, help="First image for comparison")
|
||||
parser.add_argument("--image2", type=str, help="Second image for comparison")
|
||||
parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--recognizer", type=str, default="arcface", choices=["arcface", "mobileface", "sphereface"])
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
if args.recognizer == 'arcface':
|
||||
recognizer = ArcFace()
|
||||
elif args.recognizer == 'mobileface':
|
||||
recognizer = MobileFace()
|
||||
else:
|
||||
recognizer = SphereFace()
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
recognizer = get_recognizer(args.recognizer)
|
||||
|
||||
if args.image1 and args.image2:
|
||||
# Face comparison mode
|
||||
print(f"Comparing faces: {args.image1} vs {args.image2}")
|
||||
compare_faces(detector, recognizer, args.image1, args.image2, args.threshold)
|
||||
elif args.image:
|
||||
# Single image embedding extraction mode
|
||||
run_inference(detector, recognizer, args.image)
|
||||
else:
|
||||
print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison")
|
||||
print("Error: Provide --image or both --image1 and --image2")
|
||||
parser.print_help()
|
||||
|
||||
|
||||
|
||||
@@ -1,141 +1,92 @@
|
||||
"""Video Face Detection Script"""
|
||||
# Face detection on video files
|
||||
# Usage: python run_video_detection.py --input video.mp4 --output output.mp4
|
||||
|
||||
import cv2
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
from tqdm import tqdm
|
||||
|
||||
from uniface import RetinaFace, SCRFD
|
||||
from uniface import SCRFD, RetinaFace
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
|
||||
fps: int = None, show_preview: bool = False):
|
||||
# Open input video
|
||||
def process_video(detector, input_path: str, output_path: str, threshold: float = 0.6, show_preview: bool = False):
|
||||
cap = cv2.VideoCapture(input_path)
|
||||
if not cap.isOpened():
|
||||
print(f"Error: Cannot open video file '{input_path}'")
|
||||
return
|
||||
|
||||
# Get video properties
|
||||
# get video properties
|
||||
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
source_fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
output_fps = fps if fps is not None else source_fps
|
||||
print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
|
||||
print(f"Output: {output_path}")
|
||||
|
||||
print(f"📹 Input: {input_path}")
|
||||
print(f" Resolution: {width}x{height}")
|
||||
print(f" FPS: {source_fps:.2f}")
|
||||
print(f" Total frames: {total_frames}")
|
||||
print(f"\n📹 Output: {output_path}")
|
||||
print(f" FPS: {output_fps:.2f}\n")
|
||||
|
||||
# Initialize video writer
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
|
||||
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # codec for .mp4
|
||||
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
||||
|
||||
if not out.isOpened():
|
||||
print(f"Error: Cannot create output video '{output_path}'")
|
||||
cap.release()
|
||||
return
|
||||
|
||||
# Process frames
|
||||
frame_count = 0
|
||||
total_faces = 0
|
||||
|
||||
try:
|
||||
with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
frame_count += 1
|
||||
frame_count += 1
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
|
||||
# Detect faces
|
||||
faces = detector.detect(frame)
|
||||
total_faces += len(faces)
|
||||
bboxes = [f["bbox"] for f in faces]
|
||||
scores = [f["confidence"] for f in faces]
|
||||
landmarks = [f["landmarks"] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
|
||||
|
||||
# Draw detections
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
out.write(frame)
|
||||
|
||||
# Add frame info
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||||
|
||||
# Write frame
|
||||
out.write(frame)
|
||||
|
||||
# Show preview if requested
|
||||
if show_preview:
|
||||
cv2.imshow("Processing Video - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
print("\nProcessing cancelled by user")
|
||||
break
|
||||
|
||||
pbar.update(1)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nProcessing interrupted")
|
||||
finally:
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
cv2.imshow("Processing - Press 'q' to cancel", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
print("\nCancelled by user")
|
||||
break
|
||||
|
||||
# Summary
|
||||
print(f"\nProcessing complete!")
|
||||
print(f" Processed: {frame_count} frames")
|
||||
print(f" Total faces detected: {total_faces}")
|
||||
print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
|
||||
print(f" Output saved: {output_path}")
|
||||
cap.release()
|
||||
out.release()
|
||||
if show_preview:
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
avg_faces = total_faces / frame_count if frame_count > 0 else 0
|
||||
print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
|
||||
print(f"Saved: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process video with face detection")
|
||||
parser.add_argument("--input", type=str, required=True, help="Path to input video")
|
||||
parser.add_argument("--output", type=str, required=True, help="Path to output video")
|
||||
parser.add_argument("--detector", type=str, default="retinaface",
|
||||
choices=['retinaface', 'scrfd'], help="Face detector to use")
|
||||
parser.add_argument("--threshold", type=float, default=0.6,
|
||||
help="Confidence threshold for visualization")
|
||||
parser.add_argument("--fps", type=int, default=None,
|
||||
help="Output FPS (default: same as input)")
|
||||
parser.add_argument("--preview", action="store_true",
|
||||
help="Show live preview during processing")
|
||||
parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
|
||||
|
||||
parser.add_argument("--input", type=str, required=True, help="Input video path")
|
||||
parser.add_argument("--output", type=str, required=True, help="Output video path")
|
||||
parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
|
||||
parser.add_argument("--preview", action="store_true", help="Show live preview")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check input exists
|
||||
if not Path(args.input).exists():
|
||||
print(f"Error: Input file '{args.input}' does not exist")
|
||||
return
|
||||
|
||||
# Create output directory if needed
|
||||
output_dir = Path(args.output).parent
|
||||
if output_dir != Path('.'):
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
Path(args.output).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.verbose:
|
||||
from uniface import enable_logging
|
||||
enable_logging()
|
||||
|
||||
# Initialize detector
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
if args.detector == 'retinaface':
|
||||
detector = RetinaFace()
|
||||
else:
|
||||
detector = SCRFD()
|
||||
print("Detector initialized\n")
|
||||
|
||||
# Process video
|
||||
process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
|
||||
detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
|
||||
process_video(detector, args.input, args.output, args.threshold, args.preview)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -12,15 +12,8 @@ def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Compute SHA256 hash of a model weight file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"file",
|
||||
type=Path,
|
||||
help="Path to the model weight file (.onnx, .pth, etc)."
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Compute SHA256 hash of a file")
|
||||
parser.add_argument("file", type=Path, help="Path to file")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.file.exists() or not args.file.is_file():
|
||||
@@ -28,7 +21,7 @@ def main():
|
||||
return
|
||||
|
||||
sha256 = compute_sha256(args.file)
|
||||
print(f"`SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
print(f"SHA256 hash for '{args.file.name}':\n{sha256}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user