diff --git a/scripts/README.md b/scripts/README.md index aa952e6..9052f3b 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -1,97 +1,68 @@ # Scripts -Collection of example scripts demonstrating UniFace functionality. +Scripts for testing UniFace features. ## Available Scripts -- `run_detection.py` - Face detection on images -- `run_age_gender.py` - Age and gender prediction -- `run_landmarks.py` - Facial landmark detection -- `run_recognition.py` - Face recognition and embeddings -- `run_face_search.py` - Face search and matching -- `run_video_detection.py` - Video processing with face detection -- `batch_process.py` - Batch processing of image folders -- `download_model.py` - Download and manage models +| Script | Description | +|--------|-------------| +| `run_detection.py` | Face detection on image or webcam | +| `run_age_gender.py` | Age and gender prediction | +| `run_landmarks.py` | 106-point facial landmark detection | +| `run_recognition.py` | Face embedding extraction and comparison | +| `run_face_search.py` | Real-time face matching against reference | +| `run_video_detection.py` | Face detection on video files | +| `batch_process.py` | Batch process folder of images | +| `download_model.py` | Download model weights | +| `sha256_generate.py` | Generate SHA256 hash for model files | -## Quick Start +## Usage Examples ```bash # Face detection python scripts/run_detection.py --image assets/test.jpg +python scripts/run_detection.py --webcam -# Age and gender detection +# Age and gender python scripts/run_age_gender.py --image assets/test.jpg - -# Webcam demo python scripts/run_age_gender.py --webcam +# Landmarks +python scripts/run_landmarks.py --image assets/test.jpg +python scripts/run_landmarks.py --webcam + +# Face recognition (extract embedding) +python scripts/run_recognition.py --image assets/test.jpg + +# Face comparison +python scripts/run_recognition.py --image1 face1.jpg --image2 face2.jpg + +# Face search (match webcam against reference) +python scripts/run_face_search.py --image reference.jpg + +# Video processing +python scripts/run_video_detection.py --input video.mp4 --output output.mp4 + # Batch processing python scripts/batch_process.py --input images/ --output results/ + +# Download models +python scripts/download_model.py --model-type retinaface +python scripts/download_model.py # downloads all ``` -## Import Examples - -The scripts use direct class imports for better developer experience: - -```python -# Face Detection -from uniface.detection import RetinaFace, SCRFD - -detector = RetinaFace() # or SCRFD() -faces = detector.detect(image) - -# Face Recognition -from uniface.recognition import ArcFace, MobileFace, SphereFace - -recognizer = ArcFace() # or MobileFace(), SphereFace() -embedding = recognizer.get_embedding(image, landmarks) - -# Age & Gender -from uniface.attribute import AgeGender - -age_gender = AgeGender() -gender, age = age_gender.predict(image, bbox) - -# Landmarks -from uniface.landmark import Landmark106 - -landmarker = Landmark106() -landmarks = landmarker.get_landmarks(image, bbox) -``` - -## Available Classes - -**Detection:** -- `RetinaFace` - High accuracy face detection -- `SCRFD` - Fast face detection - -**Recognition:** -- `ArcFace` - High accuracy face recognition -- `MobileFace` - Lightweight face recognition -- `SphereFace` - Alternative face recognition - -**Attributes:** -- `AgeGender` - Age and gender prediction - -**Landmarks:** -- `Landmark106` - 106-point facial landmarks - ## Common Options -Most scripts support: -- `--help` - Show usage information -- `--verbose` - Enable detailed logging -- `--detector` - Choose detector (retinaface, scrfd) -- `--threshold` - Set confidence threshold +| Option | Description | +|--------|-------------| +| `--image` | Path to input image | +| `--webcam` | Use webcam instead of image | +| `--detector` | Choose detector: `retinaface` or `scrfd` | +| `--threshold` | Visualization confidence threshold (default: 0.6) | +| `--save_dir` | Output directory (default: `outputs`) | -## Testing +## Quick Test -Run basic functionality test: ```bash python scripts/run_detection.py --image assets/test.jpg ``` - -For comprehensive testing, see the main project tests: -```bash -pytest tests/ -``` diff --git a/scripts/batch_process.py b/scripts/batch_process.py index abd3320..ed67a41 100644 --- a/scripts/batch_process.py +++ b/scripts/batch_process.py @@ -1,156 +1,87 @@ -"""Batch Image Processing Script""" +# Batch face detection on a folder of images +# Usage: python batch_process.py --input images/ --output results/ -import os -import cv2 import argparse from pathlib import Path + +import cv2 from tqdm import tqdm -from uniface import RetinaFace, SCRFD +from uniface import SCRFD, RetinaFace from uniface.visualization import draw_detections def get_image_files(input_dir: Path, extensions: tuple) -> list: - image_files = [] + files = [] for ext in extensions: - image_files.extend(input_dir.glob(f"*.{ext}")) - image_files.extend(input_dir.glob(f"*.{ext.upper()}")) - - return sorted(image_files) + files.extend(input_dir.glob(f"*.{ext}")) + files.extend(input_dir.glob(f"*.{ext.upper()}")) + return sorted(files) -def process_single_image(detector, image_path: Path, output_dir: Path, - vis_threshold: float, skip_existing: bool) -> dict: - output_path = output_dir / f"{image_path.stem}_detected{image_path.suffix}" - - # Skip if already processed - if skip_existing and output_path.exists(): - return {"status": "skipped", "faces": 0} - - # Load image +def process_image(detector, image_path: Path, output_path: Path, threshold: float) -> int: + """Process single image. Returns face count or -1 on error.""" image = cv2.imread(str(image_path)) if image is None: - return {"status": "error", "error": "Failed to load image"} + return -1 - # Detect faces - try: - faces = detector.detect(image) - except Exception as e: - return {"status": "error", "error": str(e)} + faces = detector.detect(image) - # Draw detections - bboxes = [f['bbox'] for f in faces] - scores = [f['confidence'] for f in faces] - landmarks = [f['landmarks'] for f in faces] - draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold) + # unpack face data for visualization + bboxes = [f["bbox"] for f in faces] + scores = [f["confidence"] for f in faces] + landmarks = [f["landmarks"] for f in faces] + draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold) - # Add face count - cv2.putText(image, f"Faces: {len(faces)}", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) - - # Save result + cv2.putText(image, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.imwrite(str(output_path), image) - return {"status": "success", "faces": len(faces)} - - -def batch_process(detector, input_dir: str, output_dir: str, extensions: tuple, - vis_threshold: float, skip_existing: bool): - input_path = Path(input_dir) - output_path = Path(output_dir) - - # Create output directory - output_path.mkdir(parents=True, exist_ok=True) - - # Get image files - image_files = get_image_files(input_path, extensions) - - if not image_files: - print(f"No image files found in '{input_dir}' with extensions {extensions}") - return - - print(f"Input: {input_dir}") - print(f"Output: {output_dir}") - print(f"Found {len(image_files)} images\n") - - # Process images - results = { - "success": 0, - "skipped": 0, - "error": 0, - "total_faces": 0 - } - - with tqdm(image_files, desc="Processing images", unit="img") as pbar: - for image_path in pbar: - result = process_single_image( - detector, image_path, output_path, - vis_threshold, skip_existing - ) - - if result["status"] == "success": - results["success"] += 1 - results["total_faces"] += result["faces"] - pbar.set_postfix({"faces": result["faces"]}) - elif result["status"] == "skipped": - results["skipped"] += 1 - else: - results["error"] += 1 - print(f"\nError processing {image_path.name}: {result.get('error', 'Unknown error')}") - - # Print summary - print(f"\nBatch processing complete!") - print(f" Total images: {len(image_files)}") - print(f" Successfully processed: {results['success']}") - print(f" Skipped: {results['skipped']}") - print(f" Errors: {results['error']}") - print(f" Total faces detected: {results['total_faces']}") - if results['success'] > 0: - print(f" Average faces per image: {results['total_faces']/results['success']:.2f}") - print(f"\nResults saved to: {output_dir}") + return len(faces) def main(): parser = argparse.ArgumentParser(description="Batch process images with face detection") - parser.add_argument("--input", type=str, required=True, - help="Input directory containing images") - parser.add_argument("--output", type=str, required=True, - help="Output directory for processed images") - parser.add_argument("--detector", type=str, default="retinaface", - choices=['retinaface', 'scrfd'], help="Face detector to use") - parser.add_argument("--threshold", type=float, default=0.6, - help="Confidence threshold for visualization") - parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", - help="Comma-separated list of image extensions") - parser.add_argument("--skip_existing", action="store_true", - help="Skip files that already exist in output directory") - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser.add_argument("--input", type=str, required=True, help="Input directory") + parser.add_argument("--output", type=str, required=True, help="Output directory") + parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold") + parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions") args = parser.parse_args() - # Check input directory exists - if not Path(args.input).exists(): + input_path = Path(args.input) + output_path = Path(args.output) + + if not input_path.exists(): print(f"Error: Input directory '{args.input}' does not exist") return - if args.verbose: - from uniface import enable_logging - enable_logging() + output_path.mkdir(parents=True, exist_ok=True) - # Parse extensions - extensions = tuple(ext.strip() for ext in args.extensions.split(',')) + extensions = tuple(ext.strip() for ext in args.extensions.split(",")) + image_files = get_image_files(input_path, extensions) - # Initialize detector - print(f"Initializing detector: {args.detector}") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - print("Detector initialized\n") + if not image_files: + print(f"No images found with extensions {extensions}") + return - # Process batch - batch_process(detector, args.input, args.output, extensions, - args.threshold, args.skip_existing) + print(f"Found {len(image_files)} images") + + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() + + success, errors, total_faces = 0, 0, 0 + + for img_path in tqdm(image_files, desc="Processing", unit="img"): + out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}" + result = process_image(detector, img_path, out_path, args.threshold) + + if result >= 0: + success += 1 + total_faces += result + else: + errors += 1 + print(f"\nFailed: {img_path.name}") + + print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total") if __name__ == "__main__": diff --git a/scripts/download_model.py b/scripts/download_model.py index 269198e..c3768b0 100644 --- a/scripts/download_model.py +++ b/scripts/download_model.py @@ -1,76 +1,59 @@ import argparse + from uniface.constants import ( - RetinaFaceWeights, SphereFaceWeights, MobileFaceWeights, ArcFaceWeights, - SCRFDWeights, DDAMFNWeights, AgeGenderWeights, LandmarkWeights + AgeGenderWeights, + ArcFaceWeights, + DDAMFNWeights, + LandmarkWeights, + MobileFaceWeights, + RetinaFaceWeights, + SCRFDWeights, + SphereFaceWeights, ) from uniface.model_store import verify_model_weights - -# All available model types -ALL_MODEL_TYPES = { - 'retinaface': RetinaFaceWeights, - 'sphereface': SphereFaceWeights, - 'mobileface': MobileFaceWeights, - 'arcface': ArcFaceWeights, - 'scrfd': SCRFDWeights, - 'ddamfn': DDAMFNWeights, - 'agegender': AgeGenderWeights, - 'landmark': LandmarkWeights, +MODEL_TYPES = { + "retinaface": RetinaFaceWeights, + "sphereface": SphereFaceWeights, + "mobileface": MobileFaceWeights, + "arcface": ArcFaceWeights, + "scrfd": SCRFDWeights, + "ddamfn": DDAMFNWeights, + "agegender": AgeGenderWeights, + "landmark": LandmarkWeights, } +def download_models(model_enum): + for weight in model_enum: + print(f"Downloading: {weight.value}") + try: + verify_model_weights(weight) + print(f" Done: {weight.value}") + except Exception as e: + print(f" Failed: {e}") + + def main(): - parser = argparse.ArgumentParser(description="Download and verify model weights.") + parser = argparse.ArgumentParser(description="Download model weights") parser.add_argument( "--model-type", type=str, - choices=list(ALL_MODEL_TYPES.keys()), - help="Model type to download (e.g. retinaface, arcface). If not specified, all models will be downloaded.", - ) - parser.add_argument( - "--model", - type=str, - help="Specific model to download (e.g. MNET_V2). For RetinaFace backward compatibility.", + choices=list(MODEL_TYPES.keys()), + help="Model type to download. If not specified, downloads all.", ) args = parser.parse_args() - if args.model and not args.model_type: - # Backward compatibility - assume RetinaFace - try: - weight = RetinaFaceWeights[args.model] - print(f"Downloading RetinaFace model: {weight.value}") - verify_model_weights(weight) - print("Model downloaded successfully.") - except KeyError: - print(f"Invalid RetinaFace model: {args.model}") - print(f"Available models: {[m.name for m in RetinaFaceWeights]}") - return - if args.model_type: - # Download all models from specific type - model_enum = ALL_MODEL_TYPES[args.model_type] - print(f"Downloading all {args.model_type} models...") - for weight in model_enum: - print(f"Downloading: {weight.value}") - try: - verify_model_weights(weight) - print(f"Downloaded: {weight.value}") - except Exception as e: - print(f"Failed to download {weight.value}: {e}") + print(f"Downloading {args.model_type} models...") + download_models(MODEL_TYPES[args.model_type]) else: - # Download all models from all types print("Downloading all models...") - for model_type, model_enum in ALL_MODEL_TYPES.items(): - print(f"\nDownloading {model_type} models...") - for weight in model_enum: - print(f"Downloading: {weight.value}") - try: - verify_model_weights(weight) - print(f"Downloaded: {weight.value}") - except Exception as e: - print(f"Failed to download {weight.value}: {e}") + for name, model_enum in MODEL_TYPES.items(): + print(f"\n{name}:") + download_models(model_enum) - print("\nDownload process completed.") + print("\nDone!") if __name__ == "__main__": diff --git a/scripts/run_age_gender.py b/scripts/run_age_gender.py index f0e57cd..126c176 100644 --- a/scripts/run_age_gender.py +++ b/scripts/run_age_gender.py @@ -1,158 +1,105 @@ -"""Age and Gender Detection Demo Script""" +# Age and gender prediction on detected faces +# Usage: python run_age_gender.py --image path/to/image.jpg +# python run_age_gender.py --webcam -import os -import cv2 import argparse +import os from pathlib import Path -from uniface import RetinaFace, SCRFD, AgeGender +import cv2 + +from uniface import SCRFD, AgeGender, RetinaFace from uniface.visualization import draw_detections -def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", vis_threshold: float = 0.6): +def draw_age_gender_label(image, bbox, gender: str, age: int): + """Draw age/gender label above the bounding box.""" + x1, y1 = int(bbox[0]), int(bbox[1]) + text = f"{gender}, {age}y" + (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) + cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1) + cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2) + + +def process_image(detector, age_gender, image_path: str, save_dir: str = "outputs", threshold: float = 0.6): image = cv2.imread(image_path) if image is None: print(f"Error: Failed to load image from '{image_path}'") return - print(f"Processing: {image_path}") - - # Detect faces faces = detector.detect(image) - print(f" Detected {len(faces)} face(s)") + print(f"Detected {len(faces)} face(s)") if not faces: - print(" No faces detected") return - # Draw detections - bboxes = [f['bbox'] for f in faces] - scores = [f['confidence'] for f in faces] - landmarks = [f['landmarks'] for f in faces] - draw_detections(image, bboxes, scores, landmarks, vis_threshold=vis_threshold) + bboxes = [f["bbox"] for f in faces] + scores = [f["confidence"] for f in faces] + landmarks = [f["landmarks"] for f in faces] + draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold) - # Predict and draw age/gender for each face for i, face in enumerate(faces): - gender, age = age_gender.predict(image, face['bbox']) - print(f" Face {i+1}: {gender}, {age} years old") + gender, age = age_gender.predict(image, face["bbox"]) + print(f" Face {i + 1}: {gender}, {age} years old") + draw_age_gender_label(image, face["bbox"], gender, age) - # Draw age and gender text - bbox = face['bbox'] - x1, y1 = int(bbox[0]), int(bbox[1]) - text = f"{gender}, {age}y" - - # Background rectangle for text - (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) - cv2.rectangle(image, (x1, y1 - text_height - 10), - (x1 + text_width + 10, y1), (0, 255, 0), -1) - cv2.putText(image, text, (x1 + 5, y1 - 5), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2) - - # Save result os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg") cv2.imwrite(output_path, image) print(f"Output saved: {output_path}") -def run_webcam(detector, age_gender, vis_threshold: float = 0.6): - cap = cv2.VideoCapture(0) - +def run_webcam(detector, age_gender, threshold: float = 0.6): + cap = cv2.VideoCapture(0) # 0 = default webcam if not cap.isOpened(): print("Cannot open webcam") return - print("Webcam opened") - print("Press 'q' to quit\n") + print("Press 'q' to quit") - frame_count = 0 + while True: + ret, frame = cap.read() + frame = cv2.flip(frame, 1) # mirror for natural interaction + if not ret: + break - try: - while True: - ret, frame = cap.read() - if not ret: - break + faces = detector.detect(frame) - frame_count += 1 + # unpack face data for visualization + bboxes = [f["bbox"] for f in faces] + scores = [f["confidence"] for f in faces] + landmarks = [f["landmarks"] for f in faces] + draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold) - # Detect faces - faces = detector.detect(frame) + for face in faces: + gender, age = age_gender.predict(frame, face["bbox"]) # predict per face + draw_age_gender_label(frame, face["bbox"], gender, age) - # Draw detections - bboxes = [f['bbox'] for f in faces] - scores = [f['confidence'] for f in faces] - landmarks = [f['landmarks'] for f in faces] - draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold) + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + cv2.imshow("Age & Gender Detection", frame) - # Predict and draw age/gender for each face - for face in faces: - gender, age = age_gender.predict(frame, face['bbox']) + if cv2.waitKey(1) & 0xFF == ord("q"): + break - # Draw age and gender text - bbox = face['bbox'] - x1, y1 = int(bbox[0]), int(bbox[1]) - text = f"{gender}, {age}y" - - # Background rectangle for text - (text_width, text_height), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) - cv2.rectangle(frame, (x1, y1 - text_height - 10), - (x1 + text_width + 10, y1), (0, 255, 0), -1) - cv2.putText(frame, text, (x1 + 5, y1 - 5), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2) - - # Add info - cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) - cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) - - cv2.imshow("Age & Gender Detection", frame) - - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - except KeyboardInterrupt: - print("\nInterrupted") - finally: - cap.release() - cv2.destroyAllWindows() - print(f"\nProcessed {frame_count} frames") + cap.release() + cv2.destroyAllWindows() def main(): parser = argparse.ArgumentParser(description="Run age and gender detection") parser.add_argument("--image", type=str, help="Path to input image") - parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image") - parser.add_argument("--detector", type=str, default="retinaface", - choices=['retinaface', 'scrfd'], help="Face detector to use") - parser.add_argument("--threshold", type=float, default=0.6, - help="Confidence threshold for visualization") - parser.add_argument("--save_dir", type=str, default="outputs", - help="Directory to save output images") - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser.add_argument("--webcam", action="store_true", help="Use webcam") + parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold") + parser.add_argument("--save_dir", type=str, default="outputs") args = parser.parse_args() - # Validate input if not args.image and not args.webcam: parser.error("Either --image or --webcam must be specified") - if args.verbose: - from uniface import enable_logging - enable_logging() - - # Initialize models - print(f"Initializing detector: {args.detector}") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - - print("Initializing age/gender model...") + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() age_gender = AgeGender() - print("Models initialized\n") - # Process if args.webcam: run_webcam(detector, age_gender, args.threshold) else: diff --git a/scripts/run_detection.py b/scripts/run_detection.py index 44b273e..192872b 100644 --- a/scripts/run_detection.py +++ b/scripts/run_detection.py @@ -1,79 +1,86 @@ -import os -import cv2 -import time -import argparse -import numpy as np +# Face detection on image or webcam +# Usage: python run_detection.py --image path/to/image.jpg +# python run_detection.py --webcam -from uniface.detection import RetinaFace, SCRFD +import argparse +import os + +import cv2 + +from uniface.detection import SCRFD, RetinaFace from uniface.visualization import draw_detections -def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"): +def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"): image = cv2.imread(image_path) if image is None: print(f"Error: Failed to load image from '{image_path}'") return - # 1. Get the list of face dictionaries from the detector faces = detector.detect(image) if faces: - # 2. Unpack the data into separate lists - bboxes = [face['bbox'] for face in faces] - scores = [face['confidence'] for face in faces] - landmarks = [face['landmarks'] for face in faces] - - # 3. Pass the unpacked lists to the drawing function - draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6) - + bboxes = [face["bbox"] for face in faces] + scores = [face["confidence"] for face in faces] + landmarks = [face["landmarks"] for face in faces] + draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold) os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg") cv2.imwrite(output_path, image) - print(f"Output saved at: {output_path}") + print(f"Output saved: {output_path}") + + +def run_webcam(detector, threshold: float = 0.6): + cap = cv2.VideoCapture(0) # 0 = default webcam + if not cap.isOpened(): + print("Cannot open webcam") + return + + print("Press 'q' to quit") + + while True: + ret, frame = cap.read() + frame = cv2.flip(frame, 1) # mirror for natural interaction + if not ret: + break + + faces = detector.detect(frame) + + # unpack face data for visualization + bboxes = [f["bbox"] for f in faces] + scores = [f["confidence"] for f in faces] + landmarks = [f["landmarks"] for f in faces] + draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold) + + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + cv2.imshow("Face Detection", frame) + + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + cap.release() + cv2.destroyAllWindows() def main(): - parser = argparse.ArgumentParser(description="Run face detection on an image.") - parser.add_argument("--image", type=str, required=True, help="Path to the input image") - parser.add_argument( - "--method", - type=str, - default="retinaface", - choices=['retinaface', 'scrfd'], - help="Detection method to use." - ) - parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold") - parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking") - parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images") - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser = argparse.ArgumentParser(description="Run face detection") + parser.add_argument("--image", type=str, help="Path to input image") + parser.add_argument("--webcam", action="store_true", help="Use webcam") + parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold") + parser.add_argument("--save_dir", type=str, default="outputs") args = parser.parse_args() - if args.verbose: - from uniface import enable_logging - enable_logging() + if not args.image and not args.webcam: + parser.error("Either --image or --webcam must be specified") - print(f"Initializing detector: {args.method}") - if args.method == 'retinaface': - detector = RetinaFace() + detector = RetinaFace() if args.method == "retinaface" else SCRFD() + + if args.webcam: + run_webcam(detector, args.threshold) else: - detector = SCRFD() - - avg_time = 0 - for i in range(args.iterations): - start = time.time() - run_inference(detector, args.image, args.threshold, args.save_dir) - elapsed = time.time() - start - print(f"[{i + 1}/{args.iterations}] Inference time: {elapsed:.4f} seconds") - if i >= 0: # Avoid counting the first run if it includes model loading time - avg_time += elapsed - - if args.iterations > 1: - # Adjust average calculation to exclude potential first-run overhead - effective_iterations = max(1, args.iterations) - print( - f"\nAverage inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds") + process_image(detector, args.image, args.threshold, args.save_dir) if __name__ == "__main__": diff --git a/scripts/run_face_search.py b/scripts/run_face_search.py index 5530b7f..648af67 100644 --- a/scripts/run_face_search.py +++ b/scripts/run_face_search.py @@ -1,13 +1,25 @@ +# Real-time face search: match webcam faces against a reference image +# Usage: python run_face_search.py --image reference.jpg + import argparse import cv2 import numpy as np -from uniface.detection import RetinaFace, SCRFD +from uniface.detection import SCRFD, RetinaFace from uniface.face_utils import compute_similarity from uniface.recognition import ArcFace, MobileFace, SphereFace +def get_recognizer(name: str): + if name == "arcface": + return ArcFace() + elif name == "mobileface": + return MobileFace() + else: + return SphereFace() + + def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray: image = cv2.imread(image_path) if image is None: @@ -17,44 +29,37 @@ def extract_reference_embedding(detector, recognizer, image_path: str) -> np.nda if not faces: raise RuntimeError("No faces found in reference image.") - # Get landmarks from the first detected face dictionary landmarks = np.array(faces[0]["landmarks"]) - - # Use normalized embedding for more reliable similarity comparison - embedding = recognizer.get_normalized_embedding(image, landmarks) - return embedding + return recognizer.get_normalized_embedding(image, landmarks) def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4): - cap = cv2.VideoCapture(0) + cap = cv2.VideoCapture(0) # 0 = default webcam if not cap.isOpened(): raise RuntimeError("Webcam could not be opened.") - print("Webcam started. Press 'q' to quit.") + + print("Press 'q' to quit") while True: ret, frame = cap.read() + frame = cv2.flip(frame, 1) # mirror for natural interaction if not ret: break faces = detector.detect(frame) - # Loop through each detected face for face in faces: - # Extract bbox and landmarks from the dictionary bbox = face["bbox"] landmarks = np.array(face["landmarks"]) - x1, y1, x2, y2 = map(int, bbox) - # Get the normalized embedding for the current face embedding = recognizer.get_normalized_embedding(frame, landmarks) + sim = compute_similarity(ref_embedding, embedding) # compare with reference - # Compare with the reference embedding - sim = compute_similarity(ref_embedding, embedding) - - # Draw results + # green = match, red = unknown label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})" color = (0, 255, 0) if sim > threshold else (0, 0, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) @@ -67,43 +72,20 @@ def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float def main(): - parser = argparse.ArgumentParser(description="Face recognition using a reference image.") - parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.") - parser.add_argument( - "--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"], help="Face detection method." - ) - parser.add_argument( - "--recognizer", - type=str, - default="arcface", - choices=["arcface", "mobileface", "sphereface"], - help="Face recognition method.", - ) - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") + parser = argparse.ArgumentParser(description="Face search using a reference image") + parser.add_argument("--image", type=str, required=True, help="Reference face image") + parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold") + parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"]) + parser.add_argument("--recognizer", type=str, default="arcface", choices=["arcface", "mobileface", "sphereface"]) args = parser.parse_args() - if args.verbose: - from uniface import enable_logging - - enable_logging() - - print("Initializing models...") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - - if args.recognizer == 'arcface': - recognizer = ArcFace() - elif args.recognizer == 'mobileface': - recognizer = MobileFace() - else: - recognizer = SphereFace() + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() + recognizer = get_recognizer(args.recognizer) print("Extracting reference embedding...") ref_embedding = extract_reference_embedding(detector, recognizer, args.image) - run_video(detector, recognizer, ref_embedding) + run_video(detector, recognizer, ref_embedding, args.threshold) if __name__ == "__main__": diff --git a/scripts/run_landmarks.py b/scripts/run_landmarks.py index a14958d..4e8869a 100644 --- a/scripts/run_landmarks.py +++ b/scripts/run_landmarks.py @@ -1,11 +1,14 @@ -"""Facial Landmark Detection Demo Script""" +# 106-point facial landmark detection +# Usage: python run_landmarks.py --image path/to/image.jpg +# python run_landmarks.py --webcam -import os -import cv2 import argparse +import os from pathlib import Path -from uniface import RetinaFace, SCRFD, Landmark106 +import cv2 + +from uniface import SCRFD, Landmark106, RetinaFace def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"): @@ -14,39 +17,25 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output print(f"Error: Failed to load image from '{image_path}'") return - print(f"Processing: {image_path}") - - # Detect faces faces = detector.detect(image) - print(f" Detected {len(faces)} face(s)") + print(f"Detected {len(faces)} face(s)") if not faces: - print(" No faces detected") return - # Process each face for i, face in enumerate(faces): - # Draw bounding box - bbox = face['bbox'] + bbox = face["bbox"] x1, y1, x2, y2 = map(int, bbox) cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) - # Get and draw 106 landmarks landmarks = landmarker.get_landmarks(image, bbox) - print(f" Face {i+1}: Extracted {len(landmarks)} landmarks") + print(f" Face {i + 1}: {len(landmarks)} landmarks") for x, y in landmarks.astype(int): cv2.circle(image, (x, y), 1, (0, 255, 0), -1) - # Add face count - cv2.putText(image, f"Face {i+1}", (x1, y1 - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + cv2.putText(image, f"Face {i + 1}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - # Add total count - cv2.putText(image, f"Faces: {len(faces)}", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) - - # Save result os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg") cv2.imwrite(output_path, image) @@ -54,91 +43,54 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output def run_webcam(detector, landmarker): - cap = cv2.VideoCapture(0) - + cap = cv2.VideoCapture(0) # 0 = default webcam if not cap.isOpened(): print("Cannot open webcam") return - print("Webcam opened") - print("Press 'q' to quit\n") + print("Press 'q' to quit") - frame_count = 0 + while True: + ret, frame = cap.read() + frame = cv2.flip(frame, 1) # mirror for natural interaction + if not ret: + break - try: - while True: - ret, frame = cap.read() - if not ret: - break + faces = detector.detect(frame) - frame_count += 1 + for face in faces: + bbox = face["bbox"] + x1, y1, x2, y2 = map(int, bbox) + cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) - # Detect faces - faces = detector.detect(frame) + landmarks = landmarker.get_landmarks(frame, bbox) # 106 points + for x, y in landmarks.astype(int): + cv2.circle(frame, (x, y), 1, (0, 255, 0), -1) - # Process each face - for face in faces: - # Draw bounding box - bbox = face['bbox'] - x1, y1, x2, y2 = map(int, bbox) - cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + cv2.imshow("106-Point Landmarks", frame) - # Get and draw 106 landmarks - landmarks = landmarker.get_landmarks(frame, bbox) - for x, y in landmarks.astype(int): - cv2.circle(frame, (x, y), 1, (0, 255, 0), -1) + if cv2.waitKey(1) & 0xFF == ord("q"): + break - # Add info - cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) - cv2.putText(frame, "Press 'q' to quit", (10, frame.shape[0] - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) - - cv2.imshow("106-Point Landmarks", frame) - - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - except KeyboardInterrupt: - print("\nInterrupted") - finally: - cap.release() - cv2.destroyAllWindows() - print(f"\nProcessed {frame_count} frames") + cap.release() + cv2.destroyAllWindows() def main(): parser = argparse.ArgumentParser(description="Run facial landmark detection") parser.add_argument("--image", type=str, help="Path to input image") - parser.add_argument("--webcam", action="store_true", help="Use webcam instead of image") - parser.add_argument("--detector", type=str, default="retinaface", - choices=['retinaface', 'scrfd'], help="Face detector to use") - parser.add_argument("--save_dir", type=str, default="outputs", - help="Directory to save output images") - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser.add_argument("--webcam", action="store_true", help="Use webcam") + parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--save_dir", type=str, default="outputs") args = parser.parse_args() - # Validate input if not args.image and not args.webcam: parser.error("Either --image or --webcam must be specified") - if args.verbose: - from uniface import enable_logging - enable_logging() - - # Initialize models - print(f"Initializing detector: {args.detector}") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - - print("Initializing landmark detector...") + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() landmarker = Landmark106() - print("Models initialized\n") - # Process if args.webcam: run_webcam(detector, landmarker) else: diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py index c7209f5..18f839e 100644 --- a/scripts/run_recognition.py +++ b/scripts/run_recognition.py @@ -1,10 +1,24 @@ -import cv2 +# Face recognition: extract embeddings or compare two faces +# Usage: python run_recognition.py --image path/to/image.jpg +# python run_recognition.py --image1 face1.jpg --image2 face2.jpg + import argparse + +import cv2 import numpy as np -from uniface.detection import RetinaFace, SCRFD -from uniface.recognition import ArcFace, MobileFace, SphereFace +from uniface.detection import SCRFD, RetinaFace from uniface.face_utils import compute_similarity +from uniface.recognition import ArcFace, MobileFace, SphereFace + + +def get_recognizer(name: str): + if name == "arcface": + return ArcFace() + elif name == "mobileface": + return MobileFace() + else: + return SphereFace() def run_inference(detector, recognizer, image_path: str): @@ -14,38 +28,29 @@ def run_inference(detector, recognizer, image_path: str): return faces = detector.detect(image) - if not faces: print("No faces detected.") return - print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...") + print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...") - # Process the first detected face - first_face = faces[0] - landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array - - # Extract embedding using the landmarks from the face dictionary + landmarks = np.array(faces[0]["landmarks"]) # 5-point landmarks for alignment embedding = recognizer.get_embedding(image, landmarks) - norm_embedding = recognizer.get_normalized_embedding(image, landmarks) + norm_embedding = recognizer.get_normalized_embedding(image, landmarks) # L2 normalized - # Print some info about the embeddings - print(f" - Embedding shape: {embedding.shape}") - print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}") - print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}") + print(f" Embedding shape: {embedding.shape}") + print(f" L2 norm (raw): {np.linalg.norm(embedding):.4f}") + print(f" L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}") def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35): - - # Load images img1 = cv2.imread(image1_path) img2 = cv2.imread(image2_path) if img1 is None or img2 is None: - print(f"Error: Failed to load images") + print("Error: Failed to load one or both images") return - # Detect faces faces1 = detector.detect(img1) faces2 = detector.detect(img2) @@ -53,74 +58,39 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre print("Error: No faces detected in one or both images") return - # Get landmarks for first face in each image - landmarks1 = np.array(faces1[0]['landmarks']) - landmarks2 = np.array(faces2[0]['landmarks']) + landmarks1 = np.array(faces1[0]["landmarks"]) + landmarks2 = np.array(faces2[0]["landmarks"]) - # Get normalized embeddings embedding1 = recognizer.get_normalized_embedding(img1, landmarks1) embedding2 = recognizer.get_normalized_embedding(img2, landmarks2) - # Compute similarity + # cosine similarity for normalized embeddings similarity = compute_similarity(embedding1, embedding2, normalized=True) is_match = similarity > threshold print(f"Similarity: {similarity:.4f}") - print(f"Result: {'Same person' if is_match else 'Different person'}") - print(f"Threshold: {threshold}") + print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})") def main(): - parser = argparse.ArgumentParser(description="Face recognition and comparison.") - parser.add_argument("--image", type=str, help="Path to single image for embedding extraction.") - parser.add_argument("--image1", type=str, help="Path to first image for comparison.") - parser.add_argument("--image2", type=str, help="Path to second image for comparison.") - parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold for face matching.") - parser.add_argument( - "--detector", - type=str, - default="retinaface", - choices=['retinaface', 'scrfd'], - help="Face detection method to use." - ) - parser.add_argument( - "--recognizer", - type=str, - default="arcface", - choices=['arcface', 'mobileface', 'sphereface'], - help="Face recognition method to use." - ) - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser = argparse.ArgumentParser(description="Face recognition and comparison") + parser.add_argument("--image", type=str, help="Single image for embedding extraction") + parser.add_argument("--image1", type=str, help="First image for comparison") + parser.add_argument("--image2", type=str, help="Second image for comparison") + parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold") + parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--recognizer", type=str, default="arcface", choices=["arcface", "mobileface", "sphereface"]) args = parser.parse_args() - if args.verbose: - from uniface import enable_logging - enable_logging() - - print(f"Initializing detector: {args.detector}") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - - print(f"Initializing recognizer: {args.recognizer}") - if args.recognizer == 'arcface': - recognizer = ArcFace() - elif args.recognizer == 'mobileface': - recognizer = MobileFace() - else: - recognizer = SphereFace() + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() + recognizer = get_recognizer(args.recognizer) if args.image1 and args.image2: - # Face comparison mode - print(f"Comparing faces: {args.image1} vs {args.image2}") compare_faces(detector, recognizer, args.image1, args.image2, args.threshold) elif args.image: - # Single image embedding extraction mode run_inference(detector, recognizer, args.image) else: - print("Error: Provide either --image for single image processing or --image1 and --image2 for comparison") + print("Error: Provide --image or both --image1 and --image2") parser.print_help() diff --git a/scripts/run_video_detection.py b/scripts/run_video_detection.py index 02930c3..de3547d 100644 --- a/scripts/run_video_detection.py +++ b/scripts/run_video_detection.py @@ -1,141 +1,92 @@ -"""Video Face Detection Script""" +# Face detection on video files +# Usage: python run_video_detection.py --input video.mp4 --output output.mp4 -import cv2 import argparse from pathlib import Path + +import cv2 from tqdm import tqdm -from uniface import RetinaFace, SCRFD +from uniface import SCRFD, RetinaFace from uniface.visualization import draw_detections -def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6, - fps: int = None, show_preview: bool = False): - # Open input video +def process_video(detector, input_path: str, output_path: str, threshold: float = 0.6, show_preview: bool = False): cap = cv2.VideoCapture(input_path) if not cap.isOpened(): print(f"Error: Cannot open video file '{input_path}'") return - # Get video properties + # get video properties total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - source_fps = cap.get(cv2.CAP_PROP_FPS) + fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - output_fps = fps if fps is not None else source_fps + print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)") + print(f"Output: {output_path}") - print(f"šŸ“¹ Input: {input_path}") - print(f" Resolution: {width}x{height}") - print(f" FPS: {source_fps:.2f}") - print(f" Total frames: {total_frames}") - print(f"\nšŸ“¹ Output: {output_path}") - print(f" FPS: {output_fps:.2f}\n") - - # Initialize video writer - fourcc = cv2.VideoWriter_fourcc(*'mp4v') - out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height)) + fourcc = cv2.VideoWriter_fourcc(*"mp4v") # codec for .mp4 + out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): print(f"Error: Cannot create output video '{output_path}'") cap.release() return - # Process frames frame_count = 0 total_faces = 0 - try: - with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar: - while True: - ret, frame = cap.read() - if not ret: - break + for _ in tqdm(range(total_frames), desc="Processing", unit="frames"): + ret, frame = cap.read() + if not ret: + break - frame_count += 1 + frame_count += 1 + faces = detector.detect(frame) + total_faces += len(faces) - # Detect faces - faces = detector.detect(frame) - total_faces += len(faces) + bboxes = [f["bbox"] for f in faces] + scores = [f["confidence"] for f in faces] + landmarks = [f["landmarks"] for f in faces] + draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold) - # Draw detections - bboxes = [f['bbox'] for f in faces] - scores = [f['confidence'] for f in faces] - landmarks = [f['landmarks'] for f in faces] - draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold) + cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) + out.write(frame) - # Add frame info - cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) - - # Write frame - out.write(frame) - - # Show preview if requested - if show_preview: - cv2.imshow("Processing Video - Press 'q' to cancel", frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - print("\nProcessing cancelled by user") - break - - pbar.update(1) - - except KeyboardInterrupt: - print("\nProcessing interrupted") - finally: - cap.release() - out.release() if show_preview: - cv2.destroyAllWindows() + cv2.imshow("Processing - Press 'q' to cancel", frame) + if cv2.waitKey(1) & 0xFF == ord("q"): + print("\nCancelled by user") + break - # Summary - print(f"\nProcessing complete!") - print(f" Processed: {frame_count} frames") - print(f" Total faces detected: {total_faces}") - print(f" Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "") - print(f" Output saved: {output_path}") + cap.release() + out.release() + if show_preview: + cv2.destroyAllWindows() + + avg_faces = total_faces / frame_count if frame_count > 0 else 0 + print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)") + print(f"Saved: {output_path}") def main(): parser = argparse.ArgumentParser(description="Process video with face detection") - parser.add_argument("--input", type=str, required=True, help="Path to input video") - parser.add_argument("--output", type=str, required=True, help="Path to output video") - parser.add_argument("--detector", type=str, default="retinaface", - choices=['retinaface', 'scrfd'], help="Face detector to use") - parser.add_argument("--threshold", type=float, default=0.6, - help="Confidence threshold for visualization") - parser.add_argument("--fps", type=int, default=None, - help="Output FPS (default: same as input)") - parser.add_argument("--preview", action="store_true", - help="Show live preview during processing") - parser.add_argument("--verbose", action="store_true", help="Enable verbose logging") - + parser.add_argument("--input", type=str, required=True, help="Input video path") + parser.add_argument("--output", type=str, required=True, help="Output video path") + parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"]) + parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold") + parser.add_argument("--preview", action="store_true", help="Show live preview") args = parser.parse_args() - # Check input exists if not Path(args.input).exists(): print(f"Error: Input file '{args.input}' does not exist") return - # Create output directory if needed - output_dir = Path(args.output).parent - if output_dir != Path('.'): - output_dir.mkdir(parents=True, exist_ok=True) + Path(args.output).parent.mkdir(parents=True, exist_ok=True) - if args.verbose: - from uniface import enable_logging - enable_logging() - - # Initialize detector - print(f"Initializing detector: {args.detector}") - if args.detector == 'retinaface': - detector = RetinaFace() - else: - detector = SCRFD() - print("Detector initialized\n") - - # Process video - process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview) + detector = RetinaFace() if args.detector == "retinaface" else SCRFD() + process_video(detector, args.input, args.output, args.threshold, args.preview) if __name__ == "__main__": diff --git a/scripts/sha256_generate.py b/scripts/sha256_generate.py index 7213f67..9377c1e 100644 --- a/scripts/sha256_generate.py +++ b/scripts/sha256_generate.py @@ -12,15 +12,8 @@ def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str: def main(): - parser = argparse.ArgumentParser( - description="Compute SHA256 hash of a model weight file." - ) - parser.add_argument( - "file", - type=Path, - help="Path to the model weight file (.onnx, .pth, etc)." - ) - + parser = argparse.ArgumentParser(description="Compute SHA256 hash of a file") + parser.add_argument("file", type=Path, help="Path to file") args = parser.parse_args() if not args.file.exists() or not args.file.is_file(): @@ -28,7 +21,7 @@ def main(): return sha256 = compute_sha256(args.file) - print(f"`SHA256 hash for '{args.file.name}':\n{sha256}") + print(f"SHA256 hash for '{args.file.name}':\n{sha256}") if __name__ == "__main__":