feat: Enhace emotion inference speed on ARM and add FaceAnalyzer, Face classes for ease of use. (#25)

* feat: Update linting and type annotations, return types in detect * feat: add face analyzer and face classes * chore: Update the format and clean up some docstrings * docs: Update usage documentation * feat: Change AgeGender model output to 0, 1 instead of string (Female, Male) * test: Update testing code * feat: Add Apple silicon backend for torchscript inference * feat: Add face analyzer example and add run emotion for testing
2025-12-30 09:02:25 +00:00 · 2025-11-30 20:32:07 +09:00
parent 779952e3f8
commit 0c93598007
51 changed files with 1605 additions and 966 deletions
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -8,8 +8,10 @@ Scripts for testing UniFace features.
 |--------|-------------|
 | `run_detection.py` | Face detection on image or webcam |
 | `run_age_gender.py` | Age and gender prediction |
+| `run_emotion.py` | Emotion detection (7 or 8 emotions) |
 | `run_landmarks.py` | 106-point facial landmark detection |
 | `run_recognition.py` | Face embedding extraction and comparison |
+| `run_face_analyzer.py` | Complete face analysis (detection + recognition + attributes) |
 | `run_face_search.py` | Real-time face matching against reference |
 | `run_video_detection.py` | Face detection on video files |
 | `batch_process.py` | Batch process folder of images |
@@ -27,6 +29,10 @@ python scripts/run_detection.py --webcam
 python scripts/run_age_gender.py --image assets/test.jpg
 python scripts/run_age_gender.py --webcam

+# Emotion detection
+python scripts/run_emotion.py --image assets/test.jpg
+python scripts/run_emotion.py --webcam
+
 # Landmarks
 python scripts/run_landmarks.py --image assets/test.jpg
 python scripts/run_landmarks.py --webcam
--- a/scripts/batch_process.py
+++ b/scripts/batch_process.py
@@ -14,8 +14,8 @@ from uniface.visualization import draw_detections
 def get_image_files(input_dir: Path, extensions: tuple) -> list:
    files = []
    for ext in extensions:
-        files.extend(input_dir.glob(f"*.{ext}"))
-        files.extend(input_dir.glob(f"*.{ext.upper()}"))
+        files.extend(input_dir.glob(f'*.{ext}'))
+        files.extend(input_dir.glob(f'*.{ext.upper()}'))
    return sorted(files)


@@ -28,14 +28,14 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa
    faces = detector.detect(image)

    # unpack face data for visualization
-    bboxes = [f["bbox"] for f in faces]
-    scores = [f["confidence"] for f in faces]
-    landmarks = [f["landmarks"] for f in faces]
+    bboxes = [f['bbox'] for f in faces]
+    scores = [f['confidence'] for f in faces]
+    landmarks = [f['landmarks'] for f in faces]
    draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)

    cv2.putText(
        image,
-        f"Faces: {len(faces)}",
+        f'Faces: {len(faces)}',
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        1,
@@ -48,12 +48,12 @@ def process_image(detector, image_path: Path, output_path: Path, threshold: floa


 def main():
-    parser = argparse.ArgumentParser(description="Batch process images with face detection")
-    parser.add_argument("--input", type=str, required=True, help="Input directory")
-    parser.add_argument("--output", type=str, required=True, help="Output directory")
-    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
-    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
-    parser.add_argument("--extensions", type=str, default="jpg,jpeg,png,bmp", help="Image extensions")
+    parser = argparse.ArgumentParser(description='Batch process images with face detection')
+    parser.add_argument('--input', type=str, required=True, help='Input directory')
+    parser.add_argument('--output', type=str, required=True, help='Output directory')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
+    parser.add_argument('--extensions', type=str, default='jpg,jpeg,png,bmp', help='Image extensions')
    args = parser.parse_args()

    input_path = Path(args.input)
@@ -65,21 +65,21 @@ def main():

    output_path.mkdir(parents=True, exist_ok=True)

-    extensions = tuple(ext.strip() for ext in args.extensions.split(","))
+    extensions = tuple(ext.strip() for ext in args.extensions.split(','))
    image_files = get_image_files(input_path, extensions)

    if not image_files:
-        print(f"No images found with extensions {extensions}")
+        print(f'No images found with extensions {extensions}')
        return

-    print(f"Found {len(image_files)} images")
+    print(f'Found {len(image_files)} images')

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()

    success, errors, total_faces = 0, 0, 0

-    for img_path in tqdm(image_files, desc="Processing", unit="img"):
-        out_path = output_path / f"{img_path.stem}_detected{img_path.suffix}"
+    for img_path in tqdm(image_files, desc='Processing', unit='img'):
+        out_path = output_path / f'{img_path.stem}_detected{img_path.suffix}'
        result = process_image(detector, img_path, out_path, args.threshold)

        if result >= 0:
@@ -87,10 +87,10 @@ def main():
            total_faces += result
        else:
            errors += 1
-            print(f"\nFailed: {img_path.name}")
+            print(f'\nFailed: {img_path.name}')

-    print(f"\nDone! {success} processed, {errors} errors, {total_faces} faces total")
+    print(f'\nDone! {success} processed, {errors} errors, {total_faces} faces total')


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/download_model.py
+++ b/scripts/download_model.py
@@ -13,48 +13,48 @@ from uniface.constants import (
 from uniface.model_store import verify_model_weights

 MODEL_TYPES = {
-    "retinaface": RetinaFaceWeights,
-    "sphereface": SphereFaceWeights,
-    "mobileface": MobileFaceWeights,
-    "arcface": ArcFaceWeights,
-    "scrfd": SCRFDWeights,
-    "ddamfn": DDAMFNWeights,
-    "agegender": AgeGenderWeights,
-    "landmark": LandmarkWeights,
+    'retinaface': RetinaFaceWeights,
+    'sphereface': SphereFaceWeights,
+    'mobileface': MobileFaceWeights,
+    'arcface': ArcFaceWeights,
+    'scrfd': SCRFDWeights,
+    'ddamfn': DDAMFNWeights,
+    'agegender': AgeGenderWeights,
+    'landmark': LandmarkWeights,
 }


 def download_models(model_enum):
    for weight in model_enum:
-        print(f"Downloading: {weight.value}")
+        print(f'Downloading: {weight.value}')
        try:
            verify_model_weights(weight)
-            print(f"  Done: {weight.value}")
+            print(f'  Done: {weight.value}')
        except Exception as e:
-            print(f"  Failed: {e}")
+            print(f'  Failed: {e}')


 def main():
-    parser = argparse.ArgumentParser(description="Download model weights")
+    parser = argparse.ArgumentParser(description='Download model weights')
    parser.add_argument(
-        "--model-type",
+        '--model-type',
        type=str,
        choices=list(MODEL_TYPES.keys()),
-        help="Model type to download. If not specified, downloads all.",
+        help='Model type to download. If not specified, downloads all.',
    )
    args = parser.parse_args()

    if args.model_type:
-        print(f"Downloading {args.model_type} models...")
+        print(f'Downloading {args.model_type} models...')
        download_models(MODEL_TYPES[args.model_type])
    else:
-        print("Downloading all models...")
+        print('Downloading all models...')
        for name, model_enum in MODEL_TYPES.items():
-            print(f"\n{name}:")
+            print(f'\n{name}:')
            download_models(model_enum)

-    print("\nDone!")
+    print('\nDone!')


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_age_gender.py
+++ b/scripts/run_age_gender.py
@@ -12,10 +12,11 @@ from uniface import SCRFD, AgeGender, RetinaFace
 from uniface.visualization import draw_detections


-def draw_age_gender_label(image, bbox, gender: str, age: int):
+def draw_age_gender_label(image, bbox, gender_id: int, age: int):
    """Draw age/gender label above the bounding box."""
    x1, y1 = int(bbox[0]), int(bbox[1])
-    text = f"{gender}, {age}y"
+    gender_str = 'Female' if gender_id == 0 else 'Male'
+    text = f'{gender_str}, {age}y'
    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
    cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (0, 255, 0), -1)
    cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
@@ -25,7 +26,7 @@ def process_image(
    detector,
    age_gender,
    image_path: str,
-    save_dir: str = "outputs",
+    save_dir: str = 'outputs',
    threshold: float = 0.6,
 ):
    image = cv2.imread(image_path)
@@ -34,31 +35,32 @@ def process_image(
        return

    faces = detector.detect(image)
-    print(f"Detected {len(faces)} face(s)")
+    print(f'Detected {len(faces)} face(s)')

    if not faces:
        return

-    bboxes = [f["bbox"] for f in faces]
-    scores = [f["confidence"] for f in faces]
-    landmarks = [f["landmarks"] for f in faces]
+    bboxes = [f['bbox'] for f in faces]
+    scores = [f['confidence'] for f in faces]
+    landmarks = [f['landmarks'] for f in faces]
    draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)

    for i, face in enumerate(faces):
-        gender, age = age_gender.predict(image, face["bbox"])
-        print(f"  Face {i + 1}: {gender}, {age} years old")
-        draw_age_gender_label(image, face["bbox"], gender, age)
+        gender_id, age = age_gender.predict(image, face['bbox'])
+        gender_str = 'Female' if gender_id == 0 else 'Male'
+        print(f'  Face {i + 1}: {gender_str}, {age} years old')
+        draw_age_gender_label(image, face['bbox'], gender_id, age)

    os.makedirs(save_dir, exist_ok=True)
-    output_path = os.path.join(save_dir, f"{Path(image_path).stem}_age_gender.jpg")
+    output_path = os.path.join(save_dir, f'{Path(image_path).stem}_age_gender.jpg')
    cv2.imwrite(output_path, image)
-    print(f"Output saved: {output_path}")
+    print(f'Output saved: {output_path}')


 def run_webcam(detector, age_gender, threshold: float = 0.6):
    cap = cv2.VideoCapture(0)  # 0 = default webcam
    if not cap.isOpened():
-        print("Cannot open webcam")
+        print('Cannot open webcam')
        return

    print("Press 'q' to quit")
@@ -72,27 +74,27 @@ def run_webcam(detector, age_gender, threshold: float = 0.6):
        faces = detector.detect(frame)

        # unpack face data for visualization
-        bboxes = [f["bbox"] for f in faces]
-        scores = [f["confidence"] for f in faces]
-        landmarks = [f["landmarks"] for f in faces]
+        bboxes = [f['bbox'] for f in faces]
+        scores = [f['confidence'] for f in faces]
+        landmarks = [f['landmarks'] for f in faces]
        draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)

        for face in faces:
-            gender, age = age_gender.predict(frame, face["bbox"])  # predict per face
-            draw_age_gender_label(frame, face["bbox"], gender, age)
+            gender_id, age = age_gender.predict(frame, face['bbox'])  # predict per face
+            draw_age_gender_label(frame, face['bbox'], gender_id, age)

        cv2.putText(
            frame,
-            f"Faces: {len(faces)}",
+            f'Faces: {len(faces)}',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (0, 255, 0),
            2,
        )
-        cv2.imshow("Age & Gender Detection", frame)
+        cv2.imshow('Age & Gender Detection', frame)

-        if cv2.waitKey(1) & 0xFF == ord("q"):
+        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
@@ -100,18 +102,18 @@ def run_webcam(detector, age_gender, threshold: float = 0.6):


 def main():
-    parser = argparse.ArgumentParser(description="Run age and gender detection")
-    parser.add_argument("--image", type=str, help="Path to input image")
-    parser.add_argument("--webcam", action="store_true", help="Use webcam")
-    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
-    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
-    parser.add_argument("--save_dir", type=str, default="outputs")
+    parser = argparse.ArgumentParser(description='Run age and gender detection')
+    parser.add_argument('--image', type=str, help='Path to input image')
+    parser.add_argument('--webcam', action='store_true', help='Use webcam')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
+    parser.add_argument('--save_dir', type=str, default='outputs')
    args = parser.parse_args()

    if not args.image and not args.webcam:
-        parser.error("Either --image or --webcam must be specified")
+        parser.error('Either --image or --webcam must be specified')

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
    age_gender = AgeGender()

    if args.webcam:
@@ -120,5 +122,5 @@ def main():
        process_image(detector, age_gender, args.image, args.save_dir, args.threshold)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_detection.py
+++ b/scripts/run_detection.py
@@ -11,7 +11,7 @@ from uniface.detection import SCRFD, RetinaFace
 from uniface.visualization import draw_detections


-def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = "outputs"):
+def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: str = 'outputs'):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Failed to load image from '{image_path}'")
@@ -20,21 +20,21 @@ def process_image(detector, image_path: str, threshold: float = 0.6, save_dir: s
    faces = detector.detect(image)

    if faces:
-        bboxes = [face["bbox"] for face in faces]
-        scores = [face["confidence"] for face in faces]
-        landmarks = [face["landmarks"] for face in faces]
+        bboxes = [face['bbox'] for face in faces]
+        scores = [face['confidence'] for face in faces]
+        landmarks = [face['landmarks'] for face in faces]
        draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)

    os.makedirs(save_dir, exist_ok=True)
-    output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
+    output_path = os.path.join(save_dir, f'{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg')
    cv2.imwrite(output_path, image)
-    print(f"Output saved: {output_path}")
+    print(f'Output saved: {output_path}')


 def run_webcam(detector, threshold: float = 0.6):
    cap = cv2.VideoCapture(0)  # 0 = default webcam
    if not cap.isOpened():
-        print("Cannot open webcam")
+        print('Cannot open webcam')
        return

    print("Press 'q' to quit")
@@ -48,23 +48,23 @@ def run_webcam(detector, threshold: float = 0.6):
        faces = detector.detect(frame)

        # unpack face data for visualization
-        bboxes = [f["bbox"] for f in faces]
-        scores = [f["confidence"] for f in faces]
-        landmarks = [f["landmarks"] for f in faces]
+        bboxes = [f['bbox'] for f in faces]
+        scores = [f['confidence'] for f in faces]
+        landmarks = [f['landmarks'] for f in faces]
        draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)

        cv2.putText(
            frame,
-            f"Faces: {len(faces)}",
+            f'Faces: {len(faces)}',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (0, 255, 0),
            2,
        )
-        cv2.imshow("Face Detection", frame)
+        cv2.imshow('Face Detection', frame)

-        if cv2.waitKey(1) & 0xFF == ord("q"):
+        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
@@ -72,18 +72,18 @@ def run_webcam(detector, threshold: float = 0.6):


 def main():
-    parser = argparse.ArgumentParser(description="Run face detection")
-    parser.add_argument("--image", type=str, help="Path to input image")
-    parser.add_argument("--webcam", action="store_true", help="Use webcam")
-    parser.add_argument("--method", type=str, default="retinaface", choices=["retinaface", "scrfd"])
-    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
-    parser.add_argument("--save_dir", type=str, default="outputs")
+    parser = argparse.ArgumentParser(description='Run face detection')
+    parser.add_argument('--image', type=str, help='Path to input image')
+    parser.add_argument('--webcam', action='store_true', help='Use webcam')
+    parser.add_argument('--method', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
+    parser.add_argument('--save_dir', type=str, default='outputs')
    args = parser.parse_args()

    if not args.image and not args.webcam:
-        parser.error("Either --image or --webcam must be specified")
+        parser.error('Either --image or --webcam must be specified')

-    detector = RetinaFace() if args.method == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.method == 'retinaface' else SCRFD()

    if args.webcam:
        run_webcam(detector, args.threshold)
@@ -91,5 +91,5 @@ def main():
        process_image(detector, args.image, args.threshold, args.save_dir)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_emotion.py
+++ b/scripts/run_emotion.py
@@ -0,0 +1,124 @@
+# Emotion detection on detected faces
+# Usage: python run_emotion.py --image path/to/image.jpg
+#        python run_emotion.py --webcam
+
+import argparse
+import os
+from pathlib import Path
+
+import cv2
+
+from uniface import SCRFD, Emotion, RetinaFace
+from uniface.visualization import draw_detections
+
+
+def draw_emotion_label(image, bbox, emotion: str, confidence: float):
+    """Draw emotion label above the bounding box."""
+    x1, y1 = int(bbox[0]), int(bbox[1])
+    text = f'{emotion} ({confidence:.2f})'
+    (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+    cv2.rectangle(image, (x1, y1 - th - 10), (x1 + tw + 10, y1), (255, 0, 0), -1)
+    cv2.putText(image, text, (x1 + 5, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+
+
+def process_image(
+    detector,
+    emotion_predictor,
+    image_path: str,
+    save_dir: str = 'outputs',
+    threshold: float = 0.6,
+):
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"Error: Failed to load image from '{image_path}'")
+        return
+
+    faces = detector.detect(image)
+    print(f'Detected {len(faces)} face(s)')
+
+    if not faces:
+        return
+
+    bboxes = [f['bbox'] for f in faces]
+    scores = [f['confidence'] for f in faces]
+    landmarks = [f['landmarks'] for f in faces]
+    draw_detections(image, bboxes, scores, landmarks, vis_threshold=threshold)
+
+    for i, face in enumerate(faces):
+        emotion, confidence = emotion_predictor.predict(image, face['landmarks'])
+        print(f'  Face {i + 1}: {emotion} (confidence: {confidence:.3f})')
+        draw_emotion_label(image, face['bbox'], emotion, confidence)
+
+    os.makedirs(save_dir, exist_ok=True)
+    output_path = os.path.join(save_dir, f'{Path(image_path).stem}_emotion.jpg')
+    cv2.imwrite(output_path, image)
+    print(f'Output saved: {output_path}')
+
+
+def run_webcam(detector, emotion_predictor, threshold: float = 0.6):
+    cap = cv2.VideoCapture(0)  # 0 = default webcam
+    if not cap.isOpened():
+        print('Cannot open webcam')
+        return
+
+    print("Press 'q' to quit")
+
+    while True:
+        ret, frame = cap.read()
+        frame = cv2.flip(frame, 1)  # mirror for natural interaction
+        if not ret:
+            break
+
+        faces = detector.detect(frame)
+
+        # unpack face data for visualization
+        bboxes = [f['bbox'] for f in faces]
+        scores = [f['confidence'] for f in faces]
+        landmarks = [f['landmarks'] for f in faces]
+        draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)
+
+        for face in faces:
+            emotion, confidence = emotion_predictor.predict(frame, face['landmarks'])
+            draw_emotion_label(frame, face['bbox'], emotion, confidence)
+
+        cv2.putText(
+            frame,
+            f'Faces: {len(faces)}',
+            (10, 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            (0, 255, 0),
+            2,
+        )
+        cv2.imshow('Emotion Detection', frame)
+
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Run emotion detection')
+    parser.add_argument('--image', type=str, help='Path to input image')
+    parser.add_argument('--webcam', action='store_true', help='Use webcam')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
+    parser.add_argument('--save_dir', type=str, default='outputs')
+    args = parser.parse_args()
+
+    if not args.image and not args.webcam:
+        parser.error('Either --image or --webcam must be specified')
+
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
+    emotion_predictor = Emotion()
+
+    if args.webcam:
+        run_webcam(detector, emotion_predictor, args.threshold)
+    else:
+        process_image(detector, emotion_predictor, args.image, args.save_dir, args.threshold)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/run_face_analyzer.py
+++ b/scripts/run_face_analyzer.py
@@ -0,0 +1,116 @@
+# Face analysis using FaceAnalyzer
+# Usage: python run_face_analyzer.py --image path/to/image.jpg
+
+import argparse
+import os
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from uniface import AgeGender, ArcFace, FaceAnalyzer, RetinaFace
+from uniface.visualization import draw_detections
+
+
+def draw_face_info(image, face, face_id):
+    """Draw face ID and attributes above bounding box."""
+    x1, y1, x2, y2 = map(int, face.bbox)
+    lines = [f'ID: {face_id}', f'Conf: {face.confidence:.2f}']
+    if face.age and face.gender:
+        lines.append(f'{face.gender}, {face.age}y')
+
+    for i, line in enumerate(lines):
+        y_pos = y1 - 10 - (len(lines) - 1 - i) * 25
+        if y_pos < 20:
+            y_pos = y2 + 20 + i * 25
+        (tw, th), _ = cv2.getTextSize(line, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+        cv2.rectangle(image, (x1, y_pos - th - 5), (x1 + tw + 10, y_pos + 5), (0, 255, 0), -1)
+        cv2.putText(image, line, (x1 + 5, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
+
+
+def process_image(analyzer, image_path: str, save_dir: str = 'outputs', show_similarity: bool = True):
+    image = cv2.imread(image_path)
+    if image is None:
+        print(f"Error: Failed to load image from '{image_path}'")
+        return
+
+    faces = analyzer.analyze(image)
+    print(f'Detected {len(faces)} face(s)')
+
+    if not faces:
+        return
+
+    for i, face in enumerate(faces, 1):
+        info = f'  Face {i}: {face.gender}, {face.age}y' if face.age and face.gender else f'  Face {i}'
+        if face.embedding is not None:
+            info += f' (embedding: {face.embedding.shape})'
+        print(info)
+
+    if show_similarity and len(faces) >= 2:
+        print('\nSimilarity Matrix:')
+        n = len(faces)
+        sim_matrix = np.zeros((n, n))
+
+        for i in range(n):
+            for j in range(i, n):
+                if i == j:
+                    sim_matrix[i][j] = 1.0
+                else:
+                    sim = faces[i].compute_similarity(faces[j])
+                    sim_matrix[i][j] = sim
+                    sim_matrix[j][i] = sim
+
+        print('     ', end='')
+        for i in range(n):
+            print(f'  F{i + 1:2d}  ', end='')
+        print('\n     ' + '-' * (7 * n))
+
+        for i in range(n):
+            print(f'F{i + 1:2d} | ', end='')
+            for j in range(n):
+                print(f'{sim_matrix[i][j]:6.3f} ', end='')
+            print()
+
+        pairs = [(i, j, sim_matrix[i][j]) for i in range(n) for j in range(i + 1, n)]
+        pairs.sort(key=lambda x: x[2], reverse=True)
+
+        print('\nTop matches (>0.4 = same person):')
+        for i, j, sim in pairs[:3]:
+            status = 'Same' if sim > 0.4 else 'Different'
+            print(f'  Face {i + 1} ↔ Face {j + 1}: {sim:.3f} ({status})')
+
+    bboxes = [f.bbox for f in faces]
+    scores = [f.confidence for f in faces]
+    landmarks = [f.landmarks for f in faces]
+    draw_detections(image, bboxes, scores, landmarks)
+
+    for i, face in enumerate(faces, 1):
+        draw_face_info(image, face, i)
+
+    os.makedirs(save_dir, exist_ok=True)
+    output_path = os.path.join(save_dir, f'{Path(image_path).stem}_analysis.jpg')
+    cv2.imwrite(output_path, image)
+    print(f'Output saved: {output_path}')
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Face analysis with detection, recognition, and attributes')
+    parser.add_argument('--image', type=str, required=True, help='Path to input image')
+    parser.add_argument('--save_dir', type=str, default='outputs', help='Output directory')
+    parser.add_argument('--no-similarity', action='store_true', help='Skip similarity matrix computation')
+    args = parser.parse_args()
+
+    if not os.path.exists(args.image):
+        print(f'Error: Image not found: {args.image}')
+        return
+
+    detector = RetinaFace()
+    recognizer = ArcFace()
+    age_gender = AgeGender()
+    analyzer = FaceAnalyzer(detector, recognizer, age_gender)
+
+    process_image(analyzer, args.image, args.save_dir, show_similarity=not args.no_similarity)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/run_face_search.py
+++ b/scripts/run_face_search.py
@@ -12,9 +12,9 @@ from uniface.recognition import ArcFace, MobileFace, SphereFace


 def get_recognizer(name: str):
-    if name == "arcface":
+    if name == 'arcface':
        return ArcFace()
-    elif name == "mobileface":
+    elif name == 'mobileface':
        return MobileFace()
    else:
        return SphereFace()
@@ -23,20 +23,20 @@ def get_recognizer(name: str):
 def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
    image = cv2.imread(image_path)
    if image is None:
-        raise RuntimeError(f"Failed to load image: {image_path}")
+        raise RuntimeError(f'Failed to load image: {image_path}')

    faces = detector.detect(image)
    if not faces:
-        raise RuntimeError("No faces found in reference image.")
+        raise RuntimeError('No faces found in reference image.')

-    landmarks = np.array(faces[0]["landmarks"])
+    landmarks = faces[0]['landmarks']
    return recognizer.get_normalized_embedding(image, landmarks)


 def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
    cap = cv2.VideoCapture(0)  # 0 = default webcam
    if not cap.isOpened():
-        raise RuntimeError("Webcam could not be opened.")
+        raise RuntimeError('Webcam could not be opened.')

    print("Press 'q' to quit")

@@ -49,22 +49,22 @@ def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float
        faces = detector.detect(frame)

        for face in faces:
-            bbox = face["bbox"]
-            landmarks = np.array(face["landmarks"])
+            bbox = face['bbox']
+            landmarks = face['landmarks']
            x1, y1, x2, y2 = map(int, bbox)

            embedding = recognizer.get_normalized_embedding(frame, landmarks)
            sim = compute_similarity(ref_embedding, embedding)  # compare with reference

            # green = match, red = unknown
-            label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
+            label = f'Match ({sim:.2f})' if sim > threshold else f'Unknown ({sim:.2f})'
            color = (0, 255, 0) if sim > threshold else (0, 0, 255)

            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

-        cv2.imshow("Face Recognition", frame)
-        if cv2.waitKey(1) & 0xFF == ord("q"):
+        cv2.imshow('Face Recognition', frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
@@ -72,26 +72,26 @@ def run_webcam(detector, recognizer, ref_embedding: np.ndarray, threshold: float


 def main():
-    parser = argparse.ArgumentParser(description="Face search using a reference image")
-    parser.add_argument("--image", type=str, required=True, help="Reference face image")
-    parser.add_argument("--threshold", type=float, default=0.4, help="Match threshold")
-    parser.add_argument("--detector", type=str, default="scrfd", choices=["retinaface", "scrfd"])
+    parser = argparse.ArgumentParser(description='Face search using a reference image')
+    parser.add_argument('--image', type=str, required=True, help='Reference face image')
+    parser.add_argument('--threshold', type=float, default=0.4, help='Match threshold')
+    parser.add_argument('--detector', type=str, default='scrfd', choices=['retinaface', 'scrfd'])
    parser.add_argument(
-        "--recognizer",
+        '--recognizer',
        type=str,
-        default="arcface",
-        choices=["arcface", "mobileface", "sphereface"],
+        default='arcface',
+        choices=['arcface', 'mobileface', 'sphereface'],
    )
    args = parser.parse_args()

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
    recognizer = get_recognizer(args.recognizer)

-    print(f"Loading reference: {args.image}")
+    print(f'Loading reference: {args.image}')
    ref_embedding = extract_reference_embedding(detector, recognizer, args.image)

    run_webcam(detector, recognizer, ref_embedding, args.threshold)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_landmarks.py
+++ b/scripts/run_landmarks.py
@@ -11,32 +11,32 @@ import cv2
 from uniface import SCRFD, Landmark106, RetinaFace


-def process_image(detector, landmarker, image_path: str, save_dir: str = "outputs"):
+def process_image(detector, landmarker, image_path: str, save_dir: str = 'outputs'):
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Failed to load image from '{image_path}'")
        return

    faces = detector.detect(image)
-    print(f"Detected {len(faces)} face(s)")
+    print(f'Detected {len(faces)} face(s)')

    if not faces:
        return

    for i, face in enumerate(faces):
-        bbox = face["bbox"]
+        bbox = face['bbox']
        x1, y1, x2, y2 = map(int, bbox)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        landmarks = landmarker.get_landmarks(image, bbox)
-        print(f"  Face {i + 1}: {len(landmarks)} landmarks")
+        print(f'  Face {i + 1}: {len(landmarks)} landmarks')

        for x, y in landmarks.astype(int):
            cv2.circle(image, (x, y), 1, (0, 255, 0), -1)

        cv2.putText(
            image,
-            f"Face {i + 1}",
+            f'Face {i + 1}',
            (x1, y1 - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
@@ -45,15 +45,15 @@ def process_image(detector, landmarker, image_path: str, save_dir: str = "output
        )

    os.makedirs(save_dir, exist_ok=True)
-    output_path = os.path.join(save_dir, f"{Path(image_path).stem}_landmarks.jpg")
+    output_path = os.path.join(save_dir, f'{Path(image_path).stem}_landmarks.jpg')
    cv2.imwrite(output_path, image)
-    print(f"Output saved: {output_path}")
+    print(f'Output saved: {output_path}')


 def run_webcam(detector, landmarker):
    cap = cv2.VideoCapture(0)  # 0 = default webcam
    if not cap.isOpened():
-        print("Cannot open webcam")
+        print('Cannot open webcam')
        return

    print("Press 'q' to quit")
@@ -67,7 +67,7 @@ def run_webcam(detector, landmarker):
        faces = detector.detect(frame)

        for face in faces:
-            bbox = face["bbox"]
+            bbox = face['bbox']
            x1, y1, x2, y2 = map(int, bbox)
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

@@ -77,16 +77,16 @@ def run_webcam(detector, landmarker):

        cv2.putText(
            frame,
-            f"Faces: {len(faces)}",
+            f'Faces: {len(faces)}',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
            (0, 255, 0),
            2,
        )
-        cv2.imshow("106-Point Landmarks", frame)
+        cv2.imshow('106-Point Landmarks', frame)

-        if cv2.waitKey(1) & 0xFF == ord("q"):
+        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
@@ -94,17 +94,17 @@ def run_webcam(detector, landmarker):


 def main():
-    parser = argparse.ArgumentParser(description="Run facial landmark detection")
-    parser.add_argument("--image", type=str, help="Path to input image")
-    parser.add_argument("--webcam", action="store_true", help="Use webcam")
-    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
-    parser.add_argument("--save_dir", type=str, default="outputs")
+    parser = argparse.ArgumentParser(description='Run facial landmark detection')
+    parser.add_argument('--image', type=str, help='Path to input image')
+    parser.add_argument('--webcam', action='store_true', help='Use webcam')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--save_dir', type=str, default='outputs')
    args = parser.parse_args()

    if not args.image and not args.webcam:
-        parser.error("Either --image or --webcam must be specified")
+        parser.error('Either --image or --webcam must be specified')

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
    landmarker = Landmark106()

    if args.webcam:
@@ -113,5 +113,5 @@ def main():
        process_image(detector, landmarker, args.image, args.save_dir)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_recognition.py
+++ b/scripts/run_recognition.py
@@ -13,9 +13,9 @@ from uniface.recognition import ArcFace, MobileFace, SphereFace


 def get_recognizer(name: str):
-    if name == "arcface":
+    if name == 'arcface':
        return ArcFace()
-    elif name == "mobileface":
+    elif name == 'mobileface':
        return MobileFace()
    else:
        return SphereFace()
@@ -29,18 +29,18 @@ def run_inference(detector, recognizer, image_path: str):

    faces = detector.detect(image)
    if not faces:
-        print("No faces detected.")
+        print('No faces detected.')
        return

-    print(f"Detected {len(faces)} face(s). Extracting embedding for the first face...")
+    print(f'Detected {len(faces)} face(s). Extracting embedding for the first face...')

-    landmarks = np.array(faces[0]["landmarks"])  # 5-point landmarks for alignment
+    landmarks = faces[0]['landmarks']  # 5-point landmarks for alignment (already np.ndarray)
    embedding = recognizer.get_embedding(image, landmarks)
    norm_embedding = recognizer.get_normalized_embedding(image, landmarks)  # L2 normalized

-    print(f"  Embedding shape: {embedding.shape}")
-    print(f"  L2 norm (raw): {np.linalg.norm(embedding):.4f}")
-    print(f"  L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}")
+    print(f'  Embedding shape: {embedding.shape}')
+    print(f'  L2 norm (raw): {np.linalg.norm(embedding):.4f}')
+    print(f'  L2 norm (normalized): {np.linalg.norm(norm_embedding):.4f}')


 def compare_faces(detector, recognizer, image1_path: str, image2_path: str, threshold: float = 0.35):
@@ -48,18 +48,18 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
    img2 = cv2.imread(image2_path)

    if img1 is None or img2 is None:
-        print("Error: Failed to load one or both images")
+        print('Error: Failed to load one or both images')
        return

    faces1 = detector.detect(img1)
    faces2 = detector.detect(img2)

    if not faces1 or not faces2:
-        print("Error: No faces detected in one or both images")
+        print('Error: No faces detected in one or both images')
        return

-    landmarks1 = np.array(faces1[0]["landmarks"])
-    landmarks2 = np.array(faces2[0]["landmarks"])
+    landmarks1 = faces1[0]['landmarks']
+    landmarks2 = faces2[0]['landmarks']

    embedding1 = recognizer.get_normalized_embedding(img1, landmarks1)
    embedding2 = recognizer.get_normalized_embedding(img2, landmarks2)
@@ -68,26 +68,26 @@ def compare_faces(detector, recognizer, image1_path: str, image2_path: str, thre
    similarity = compute_similarity(embedding1, embedding2, normalized=True)
    is_match = similarity > threshold

-    print(f"Similarity: {similarity:.4f}")
-    print(f"Result: {'Same person' if is_match else 'Different person'} (threshold: {threshold})")
+    print(f'Similarity: {similarity:.4f}')
+    print(f'Result: {"Same person" if is_match else "Different person"} (threshold: {threshold})')


 def main():
-    parser = argparse.ArgumentParser(description="Face recognition and comparison")
-    parser.add_argument("--image", type=str, help="Single image for embedding extraction")
-    parser.add_argument("--image1", type=str, help="First image for comparison")
-    parser.add_argument("--image2", type=str, help="Second image for comparison")
-    parser.add_argument("--threshold", type=float, default=0.35, help="Similarity threshold")
-    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
+    parser = argparse.ArgumentParser(description='Face recognition and comparison')
+    parser.add_argument('--image', type=str, help='Single image for embedding extraction')
+    parser.add_argument('--image1', type=str, help='First image for comparison')
+    parser.add_argument('--image2', type=str, help='Second image for comparison')
+    parser.add_argument('--threshold', type=float, default=0.35, help='Similarity threshold')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
    parser.add_argument(
-        "--recognizer",
+        '--recognizer',
        type=str,
-        default="arcface",
-        choices=["arcface", "mobileface", "sphereface"],
+        default='arcface',
+        choices=['arcface', 'mobileface', 'sphereface'],
    )
    args = parser.parse_args()

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
    recognizer = get_recognizer(args.recognizer)

    if args.image1 and args.image2:
@@ -95,9 +95,9 @@ def main():
    elif args.image:
        run_inference(detector, recognizer, args.image)
    else:
-        print("Error: Provide --image or both --image1 and --image2")
+        print('Error: Provide --image or both --image1 and --image2')
        parser.print_help()


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/run_video_detection.py
+++ b/scripts/run_video_detection.py
@@ -29,10 +29,10 @@ def process_video(
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

-    print(f"Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)")
-    print(f"Output: {output_path}")
+    print(f'Input: {input_path} ({width}x{height}, {fps:.1f} fps, {total_frames} frames)')
+    print(f'Output: {output_path}')

-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # codec for .mp4
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # codec for .mp4
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    if not out.isOpened():
@@ -43,7 +43,7 @@ def process_video(
    frame_count = 0
    total_faces = 0

-    for _ in tqdm(range(total_frames), desc="Processing", unit="frames"):
+    for _ in tqdm(range(total_frames), desc='Processing', unit='frames'):
        ret, frame = cap.read()
        if not ret:
            break
@@ -52,14 +52,14 @@ def process_video(
        faces = detector.detect(frame)
        total_faces += len(faces)

-        bboxes = [f["bbox"] for f in faces]
-        scores = [f["confidence"] for f in faces]
-        landmarks = [f["landmarks"] for f in faces]
+        bboxes = [f['bbox'] for f in faces]
+        scores = [f['confidence'] for f in faces]
+        landmarks = [f['landmarks'] for f in faces]
        draw_detections(frame, bboxes, scores, landmarks, vis_threshold=threshold)

        cv2.putText(
            frame,
-            f"Faces: {len(faces)}",
+            f'Faces: {len(faces)}',
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            1,
@@ -70,8 +70,8 @@ def process_video(

        if show_preview:
            cv2.imshow("Processing - Press 'q' to cancel", frame)
-            if cv2.waitKey(1) & 0xFF == ord("q"):
-                print("\nCancelled by user")
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                print('\nCancelled by user')
                break

    cap.release()
@@ -80,17 +80,17 @@ def process_video(
        cv2.destroyAllWindows()

    avg_faces = total_faces / frame_count if frame_count > 0 else 0
-    print(f"\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)")
-    print(f"Saved: {output_path}")
+    print(f'\nDone! {frame_count} frames, {total_faces} faces ({avg_faces:.1f} avg/frame)')
+    print(f'Saved: {output_path}')


 def main():
-    parser = argparse.ArgumentParser(description="Process video with face detection")
-    parser.add_argument("--input", type=str, required=True, help="Input video path")
-    parser.add_argument("--output", type=str, required=True, help="Output video path")
-    parser.add_argument("--detector", type=str, default="retinaface", choices=["retinaface", "scrfd"])
-    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization threshold")
-    parser.add_argument("--preview", action="store_true", help="Show live preview")
+    parser = argparse.ArgumentParser(description='Process video with face detection')
+    parser.add_argument('--input', type=str, required=True, help='Input video path')
+    parser.add_argument('--output', type=str, required=True, help='Output video path')
+    parser.add_argument('--detector', type=str, default='retinaface', choices=['retinaface', 'scrfd'])
+    parser.add_argument('--threshold', type=float, default=0.6, help='Visualization threshold')
+    parser.add_argument('--preview', action='store_true', help='Show live preview')
    args = parser.parse_args()

    if not Path(args.input).exists():
@@ -99,9 +99,9 @@ def main():

    Path(args.output).parent.mkdir(parents=True, exist_ok=True)

-    detector = RetinaFace() if args.detector == "retinaface" else SCRFD()
+    detector = RetinaFace() if args.detector == 'retinaface' else SCRFD()
    process_video(detector, args.input, args.output, args.threshold, args.preview)


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()
--- a/scripts/sha256_generate.py
+++ b/scripts/sha256_generate.py
@@ -5,24 +5,24 @@ from pathlib import Path

 def compute_sha256(file_path: Path, chunk_size: int = 8192) -> str:
    sha256_hash = hashlib.sha256()
-    with file_path.open("rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
+    with file_path.open('rb') as f:
+        for chunk in iter(lambda: f.read(chunk_size), b''):
            sha256_hash.update(chunk)
    return sha256_hash.hexdigest()


 def main():
-    parser = argparse.ArgumentParser(description="Compute SHA256 hash of a file")
-    parser.add_argument("file", type=Path, help="Path to file")
+    parser = argparse.ArgumentParser(description='Compute SHA256 hash of a file')
+    parser.add_argument('file', type=Path, help='Path to file')
    args = parser.parse_args()

    if not args.file.exists() or not args.file.is_file():
-        print(f"File does not exist: {args.file}")
+        print(f'File does not exist: {args.file}')
        return

    sha256 = compute_sha256(args.file)
    print(f"SHA256 hash for '{args.file.name}':\n{sha256}")


-if __name__ == "__main__":
+if __name__ == '__main__':
    main()