# Copyright 2025-2026 Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev # GitHub: https://github.com/yakhyo """Face detection on image, video, or webcam. Usage: python tools/detect.py --source path/to/image.jpg python tools/detect.py --source path/to/video.mp4 python tools/detect.py --source 0 # webcam """ from __future__ import annotations import argparse import os from pathlib import Path import time from _common import get_source_type import cv2 from tqdm import tqdm from uniface.detection import SCRFD, RetinaFace, YOLOv5Face, YOLOv8Face from uniface.draw import draw_detections def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'): """Process a single image.""" image = cv2.imread(image_path) if image is None: print(f"Error: Failed to load image from '{image_path}'") return faces = detector.detect(image) if faces: draw_detections(image=image, faces=faces, vis_threshold=threshold) os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg') cv2.imwrite(output_path, image) print(f'Detected {len(faces)} face(s). Output saved: {output_path}') def process_video( detector, input_path: str, output_path: str, threshold: float = 0.6, show_preview: bool = False, ): """Process a video file with progress bar.""" cap = cv2.VideoCapture(input_path) if not cap.isOpened(): print(f"Error: Cannot open video file '{input_path}'") return total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)') print(f'Output: {output_path}') Path(output_path).parent.mkdir(parents=True, exist_ok=True) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): print(f"Error: Cannot create output video '{output_path}'") cap.release() return frame_count = 0 total_faces = 0 for _ in tqdm(range(total_frames), desc='Processing', unit='frames'): ret, frame = cap.read() if not ret: break t0 = time.perf_counter() frame_count += 1 faces = detector.detect(frame) total_faces += len(faces) draw_detections( image=frame, faces=faces, vis_threshold=threshold, draw_score=True, corner_bbox=True, ) inference_fps = 1.0 / max(time.perf_counter() - t0, 1e-9) cv2.putText(frame, f'FPS: {inference_fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) out.write(frame) if show_preview: cv2.imshow("Processing - Press 'q' to cancel", frame) if cv2.waitKey(1) & 0xFF == ord('q'): print('\nCancelled by user') break cap.release() out.release() if show_preview: cv2.destroyAllWindows() avg_faces = total_faces / frame_count if frame_count > 0 else 0 print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)') print(f'Saved: {output_path}') def run_camera(detector, camera_id: int = 0, threshold: float = 0.6): """Run real-time detection on webcam.""" cap = cv2.VideoCapture(camera_id) if not cap.isOpened(): print(f'Cannot open camera {camera_id}') return print("Press 'q' to quit") prev_time = time.perf_counter() while True: ret, frame = cap.read() if not ret: break frame = cv2.flip(frame, 1) faces = detector.detect(frame) draw_detections( image=frame, faces=faces, vis_threshold=threshold, draw_score=True, corner_bbox=True, ) curr_time = time.perf_counter() fps = 1.0 / max(curr_time - prev_time, 1e-9) prev_time = curr_time cv2.putText(frame, f'FPS: {fps:.1f}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.putText(frame, f'Faces: {len(faces)}', (10, 65), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) cv2.imshow('Face Detection', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() def main(): parser = argparse.ArgumentParser(description='Run face detection') parser.add_argument('--source', type=str, required=True, help='Image/video path or camera ID (0, 1, ...)') parser.add_argument( '--detector', '--method', type=str, default='retinaface', choices=['retinaface', 'scrfd', 'yolov5face', 'yolov8face'], ) parser.add_argument('--threshold', type=float, default=0.25, help='Visualization threshold') parser.add_argument('--preview', action='store_true', help='Show live preview during video processing') parser.add_argument('--save-dir', type=str, default='outputs', help='Output directory') parser.add_argument('--output', type=str, default=None, help='Output video path (auto-generated if not specified)') args = parser.parse_args() # Initialize detector if args.detector == 'retinaface': detector = RetinaFace() elif args.detector == 'scrfd': detector = SCRFD() elif args.detector == 'yolov5face': from uniface.constants import YOLOv5FaceWeights detector = YOLOv5Face(model_name=YOLOv5FaceWeights.YOLOV5M) else: # yolov8face from uniface.constants import YOLOv8FaceWeights detector = YOLOv8Face(model_name=YOLOv8FaceWeights.YOLOV8N) source_type = get_source_type(args.source) if source_type == 'camera': run_camera(detector, int(args.source), args.threshold) elif source_type == 'image': if not os.path.exists(args.source): print(f'Error: Image not found: {args.source}') return process_image(detector, args.source, args.threshold, args.save_dir) elif source_type == 'video': if not os.path.exists(args.source): print(f'Error: Video not found: {args.source}') return if args.output: output_path = args.output else: os.makedirs(args.save_dir, exist_ok=True) output_path = os.path.join(args.save_dir, f'{Path(args.source).stem}_detected.mp4') process_video(detector, args.source, output_path, args.threshold, args.preview) else: print(f"Error: Unknown source type for '{args.source}'") print('Supported formats: images (.jpg, .png, ...), videos (.mp4, .avi, ...), or camera ID (0, 1, ...)') if __name__ == '__main__': main()