ref: Update some refactoring files for testing

2025-12-30 09:02:25 +00:00 · 2025-11-25 23:19:45 +09:00
parent 11363fe0a8
commit 189755a1a6
10 changed files with 397 additions and 710 deletions
--- a/scripts/run_video_detection.py
+++ b/scripts/run_video_detection.py
@@ -1,141 +1,92 @@
-"""Video Face Detection Script"""
+# Face detection on video files
+# Usage: python run_video_detection.py --input video.mp4 --output output.mp4

-import cv2
 import argparse
 from pathlib import Path
+
+import cv2
 from tqdm import tqdm

-from uniface import RetinaFace, SCRFD
+from uniface import SCRFD, RetinaFace
 from uniface.visualization import draw_detections


-def process_video(detector, input_path: str, output_path: str, vis_threshold: float = 0.6,
-                 fps: int = None, show_preview: bool = False):
-    # Open input video
+def process_video(detector, input_path: str, output_path: str, threshold: float = 0.6, show_preview: bool = False):
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error: Cannot open video file '{input_path}'")
        return

-    # Get video properties
+    # get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    source_fps = cap.get(cv2.CAP_PROP_FPS)
+    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

-    output_fps = fps if fps is not None else source_fps
+    print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
+    print(f"Output: {output_path}")

-    print(f"📹 Input: {input_path}")
-    print(f"   Resolution: {width}x{height}")
-    print(f"   FPS: {source_fps:.2f}")
-    print(f"   Total frames: {total_frames}")
-    print(f"\n📹 Output: {output_path}")
-    print(f"   FPS: {output_fps:.2f}\n")
-
-    # Initialize video writer
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height))
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # codec for .mp4
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    if not out.isOpened():
        print(f"Error: Cannot create output video '{output_path}'")
        cap.release()
        return

-    # Process frames
    frame_count = 0
    total_faces = 0

-    try:
-        with tqdm(total=total_frames, desc="Processing", unit="frames") as pbar:
-            while True:
-                ret, frame = cap.read()
-                if not ret:
-                    break
+    for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
+        ret, frame = cap.read()
+        if not ret:
+            break

-                frame_count += 1
+        frame_count += 1
+        faces = detector.detect(frame)
+        total_faces += len(faces)

-                # Detect faces
-                faces = detector.detect(frame)
-                total_faces += len(faces)
+        bboxes = [f["bbox"] for f in faces]
+        scores = [f["confidence"] for f in faces]
+        landmarks = [f["landmarks"] for f in faces]
+        draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)

-                # Draw detections
-                bboxes = [f['bbox'] for f in faces]
-                scores = [f['confidence'] for f in faces]
-                landmarks = [f['landmarks'] for f in faces]
-                draw_detections(frame, bboxes, scores, landmarks, vis_threshold=vis_threshold)
+        cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+        out.write(frame)

-                # Add frame info
-                cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
-                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-
-                # Write frame
-                out.write(frame)
-
-                # Show preview if requested
-                if show_preview:
-                    cv2.imshow("Processing Video - Press 'q' to cancel", frame)
-                    if cv2.waitKey(1) & 0xFF == ord('q'):
-                        print("\nProcessing cancelled by user")
-                        break
-
-                pbar.update(1)
-
-    except KeyboardInterrupt:
-        print("\nProcessing interrupted")
-    finally:
-        cap.release()
-        out.release()
        if show_preview:
-            cv2.destroyAllWindows()
+            cv2.imshow("Processing - Press 'q' to cancel", frame)
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                print("\nCancelled by user")
+                break

-    # Summary
-    print(f"\nProcessing complete!")
-    print(f"   Processed: {frame_count} frames")
-    print(f"   Total faces detected: {total_faces}")
-    print(f"   Average faces per frame: {total_faces/frame_count:.2f}" if frame_count > 0 else "")
-    print(f"   Output saved: {output_path}")
+    cap.release()
+    out.release()
+    if show_preview:
+        cv2.destroyAllWindows()
+
+    avg_faces = total_faces / frame_count if frame_count > 0 else 0
+    print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
+    print(f"Saved: {output_path}")


 def main():
    parser = argparse.ArgumentParser(description="Process video with face detection")
-    parser.add_argument("--input", type=str, required=True, help="Path to input video")
-    parser.add_argument("--output", type=str, required=True, help="Path to output video")
-    parser.add_argument("--detector", type=str, default="retinaface",
-                       choices=['retinaface', 'scrfd'], help="Face detector to use")
-    parser.add_argument("--threshold", type=float, default=0.6,
-                       help="Confidence threshold for visualization")
-    parser.add_argument("--fps", type=int, default=None,
-                       help="Output FPS (default: same as input)")
-    parser.add_argument("--preview", action="store_true",
-                       help="Show live preview during processing")
-    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
-
+    parser.add_argument("--input", type=str, required=True, help="Input video path")
+    parser.add_argument("--output", type=str, required=True, help="Output video path")
+    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
+    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
+    parser.add_argument("--preview", action="store_true", help="Show live preview")
    args = parser.parse_args()

-    # Check input exists
    if not Path(args.input).exists():
        print(f"Error: Input file '{args.input}' does not exist")
        return

-    # Create output directory if needed
-    output_dir = Path(args.output).parent
-    if output_dir != Path('.'):
-        output_dir.mkdir(parents=True, exist_ok=True)
+    Path(args.output).parent.mkdir(parents=True, exist_ok=True)

-    if args.verbose:
-        from uniface import enable_logging
-        enable_logging()
-
-    # Initialize detector
-    print(f"Initializing detector: {args.detector}")
-    if args.detector == 'retinaface':
-        detector = RetinaFace()
-    else:
-        detector = SCRFD()
-    print("Detector initialized\n")
-
-    # Process video
-    process_video(detector, args.input, args.output, args.threshold, args.fps, args.preview)
+    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    process_video(detector, args.input, args.output, args.threshold, args.preview)


 if __name__ == "__main__":