Merge pull request #42 from yakhyo/feat/standardize-outputs

feat: Standardize detection output and several other updates
2025-12-30 09:02:25 +00:00 · 2025-12-24 00:38:32 +09:00
parent 96306a0910 2200ba063c
commit d97a3b2cb2
20 changed files with 188 additions and 192 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,11 +21,28 @@ Thank you for considering contributing to UniFace! We welcome contributions of a

 ### Code Style

+This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
+
+```bash
+# Check for linting errors
+ruff check .
+
+# Auto-fix linting errors
+ruff check . --fix
+
+# Format code
+ruff format .
+```
+
+**Guidelines:**
 - Follow PEP8 guidelines
 - Use type hints (Python 3.10+)
 - Write docstrings for public APIs
+- Line length: 120 characters
 - Keep code simple and readable

+All PRs must pass `ruff check .` before merging.
+
 ## Development Setup

 ```bash
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -39,9 +39,9 @@ faces = detector.detect(image)
 # Print results
 for i, face in enumerate(faces):
    print(f"Face {i+1}:")
-    print(f"  Confidence: {face['confidence']:.2f}")
-    print(f"  BBox: {face['bbox']}")
-    print(f"  Landmarks: {len(face['landmarks'])} points")
+    print(f"  Confidence: {face.confidence:.2f}")
+    print(f"  BBox: {face.bbox}")
+    print(f"  Landmarks: {len(face.landmarks)} points")
 ```

 **Output:**
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)

 # Extract visualization data
-bboxes = [f['bbox'] for f in faces]
-scores = [f['confidence'] for f in faces]
-landmarks = [f['landmarks'] for f in faces]
+bboxes = [f.bbox for f in faces]
+scores = [f.confidence for f in faces]
+landmarks = [f.landmarks for f in faces]

 # Draw on image
 draw_detections(
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)

 if faces1 and faces2:
    # Extract embeddings
-    emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
-    emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+    emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
+    emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)

    # Compute similarity (cosine similarity)
    similarity = np.dot(emb1, emb2.T)[0][0]
@@ -159,9 +159,9 @@ while True:
    faces = detector.detect(frame)

    # Draw results
-    bboxes = [f['bbox'] for f in faces]
-    scores = [f['confidence'] for f in faces]
-    landmarks = [f['landmarks'] for f in faces]
+    bboxes = [f.bbox for f in faces]
+    scores = [f.confidence for f in faces]
+    landmarks = [f.landmarks for f in faces]
    draw_detections(
        image=frame,
        bboxes=bboxes,
@@ -199,7 +199,7 @@ faces = detector.detect(image)

 # Predict attributes
 for i, face in enumerate(faces):
-    gender, age = age_gender.predict(image, face['bbox'])
+    gender, age = age_gender.predict(image, face.bbox)
    gender_str = 'Female' if gender == 0 else 'Male'
    print(f"Face {i+1}: {gender_str}, {age} years old")
 ```
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)

 if faces:
-    landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+    landmarks = landmarker.get_landmarks(image, faces[0].bbox)
    print(f"Detected {len(landmarks)} landmarks")

    # Draw landmarks
@@ -262,8 +262,7 @@ faces = detector.detect(image)

 # Estimate gaze for each face
 for i, face in enumerate(faces):
-    bbox = face['bbox']
-    x1, y1, x2, y2 = map(int, bbox[:4])
+    x1, y1, x2, y2 = map(int, face.bbox[:4])
    face_crop = image[y1:y2, x1:x2]

    if face_crop.size > 0:
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
        print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")

        # Draw gaze direction
-        draw_gaze(image, bbox, pitch, yaw)
+        draw_gaze(image, face.bbox, pitch, yaw)

 cv2.imwrite("gaze_output.jpg", image)
 ```
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)

 for i, face in enumerate(faces):
-    label_idx, score = spoofer.predict(image, face['bbox'])
+    label_idx, score = spoofer.predict(image, face.bbox)
    # label_idx: 0 = Fake, 1 = Real
    label = 'Real' if label_idx == 1 else 'Fake'
    print(f"Face {i+1}: {label} ({score:.1%})")
--- a/README.md
+++ b/README.md
@@ -1,11 +1,15 @@
 # UniFace: All-in-One Face Analysis Library

+<div align="center">
+
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![Python](https://img.shields.io/badge/Python-3.10%2B-blue)](https://www.python.org/)
 [![PyPI](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
 [![CI](https://github.com/yakhyo/uniface/actions/workflows/ci.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
-[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
-[![DeepWiki](https://img.shields.io/badge/DeepWiki-yakhyo%2Funiface-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/yakhyo/uniface)
+[![Downloads](https://static.pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
+[![DeepWiki](https://img.shields.io/badge/DeepWiki-AI_Docs-blue.svg?logo=bookstack)](https://deepwiki.com/yakhyo/uniface)
+
+</div>

 <div align="center">
    <img src=".github/logos/logo_web.webp" width=75%>
@@ -101,9 +105,9 @@ faces = detector.detect(image)

 # Process results
 for face in faces:
-    bbox = face['bbox']  # [x1, y1, x2, y2]
-    confidence = face['confidence']
-    landmarks = face['landmarks']  # 5-point landmarks
+    bbox = face.bbox  # np.ndarray [x1, y1, x2, y2]
+    confidence = face.confidence
+    landmarks = face.landmarks  # np.ndarray (5, 2) landmarks
    print(f"Face detected with confidence: {confidence:.2f}")
 ```

@@ -121,8 +125,8 @@ recognizer = ArcFace()
 faces1 = detector.detect(image1)
 faces2 = detector.detect(image2)

-embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
-embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
+embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)

 # Compare faces
 similarity = compute_similarity(embedding1, embedding2)
@@ -138,7 +142,7 @@ detector = RetinaFace()
 landmarker = Landmark106()

 faces = detector.detect(image)
-landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+landmarks = landmarker.get_landmarks(image, faces[0].bbox)
 # Returns 106 (x, y) landmark points
 ```

@@ -151,7 +155,7 @@ detector = RetinaFace()
 age_gender = AgeGender()

 faces = detector.detect(image)
-gender, age = age_gender.predict(image, faces[0]['bbox'])
+gender, age = age_gender.predict(image, faces[0].bbox)
 gender_str = 'Female' if gender == 0 else 'Male'
 print(f"{gender_str}, {age} years old")
 ```
@@ -168,15 +172,14 @@ gaze_estimator = MobileGaze()

 faces = detector.detect(image)
 for face in faces:
-    bbox = face['bbox']
-    x1, y1, x2, y2 = map(int, bbox[:4])
+    x1, y1, x2, y2 = map(int, face.bbox[:4])
    face_crop = image[y1:y2, x1:x2]

    pitch, yaw = gaze_estimator.estimate(face_crop)
    print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")

    # Visualize
-    draw_gaze(image, bbox, pitch, yaw)
+    draw_gaze(image, face.bbox, pitch, yaw)
 ```

 ### Face Parsing
@@ -213,7 +216,7 @@ spoofer = MiniFASNet()  # Uses V2 by default

 faces = detector.detect(image)
 for face in faces:
-    label_idx, score = spoofer.predict(image, face['bbox'])
+    label_idx, score = spoofer.predict(image, face.bbox)
    # label_idx: 0 = Fake, 1 = Real
    label = 'Real' if label_idx == 1 else 'Fake'
    print(f"{label}: {score:.1%}")
@@ -458,9 +461,9 @@ while True:
    faces = detector.detect(frame)

    # Extract data for visualization
-    bboxes = [f['bbox'] for f in faces]
-    scores = [f['confidence'] for f in faces]
-    landmarks = [f['landmarks'] for f in faces]
+    bboxes = [f.bbox for f in faces]
+    scores = [f.confidence for f in faces]
+    landmarks = [f.landmarks for f in faces]

    draw_detections(
        image=frame,
@@ -494,7 +497,7 @@ for person_id, image_path in person_images.items():
    faces = detector.detect(image)
    if faces:
        embedding = recognizer.get_normalized_embedding(
-            image, faces[0]['landmarks']
+            image, faces[0].landmarks
        )
        database[person_id] = embedding

@@ -503,7 +506,7 @@ query_image = cv2.imread("query.jpg")
 query_faces = detector.detect(query_image)
 if query_faces:
    query_embedding = recognizer.get_normalized_embedding(
-        query_image, query_faces[0]['landmarks']
+        query_image, query_faces[0].landmarks
    )

    # Find best match
--- a/examples/face_alignment.ipynb
+++ b/examples/face_alignment.ipynb
@@ -48,7 +48,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
@@ -140,13 +140,13 @@
    "\n",
    "    # Draw detections\n",
    "    bbox_image = image.copy()\n",
-    "    bboxes = [f['bbox'] for f in faces]\n",
-    "    scores = [f['confidence'] for f in faces]\n",
-    "    landmarks = [f['landmarks'] for f in faces]\n",
+    "    bboxes = [f.bbox for f in faces]\n",
+    "    scores = [f.confidence for f in faces]\n",
+    "    landmarks = [f.landmarks for f in faces]\n",
    "    draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
    "    # Align first detected face (returns aligned image and inverse transform matrix)\n",
-    "    first_landmarks = faces[0]['landmarks']\n",
+    "    first_landmarks = faces[0].landmarks\n",
    "    aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
    "\n",
    "    # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
   "source": [
    "## Notes\n",
    "\n",
-    "- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
+    "- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
+    "- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
    "- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
    "- Default output size is 112x112 (standard for face recognition models)\n"
   ]
--- a/examples/face_analyzer.ipynb
+++ b/examples/face_analyzer.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/face_anonymization.ipynb
+++ b/examples/face_anonymization.ipynb
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
@@ -153,14 +153,14 @@
    "# Load image\n",
    "image = cv2.imread(image_path)\n",
    "\n",
-    "# Detect faces - returns list of face dictionaries\n",
+    "# Detect faces - returns list of Face objects\n",
    "faces = detector.detect(image)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
    "# Unpack face data for visualization\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "# Draw detections\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
    "faces = detector.detect(image, max_num=2)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
@@ -258,9 +258,9 @@
    "faces = detector.detect(image, max_num=5)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
@@ -274,7 +274,8 @@
   "source": [
    "## Notes\n",
    "\n",
-    "- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
+    "- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
+    "- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
    "- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
    "- Use `max_num` to limit detected faces"
   ]
--- a/examples/face_parsing.ipynb
+++ b/examples/face_parsing.ipynb
@@ -46,7 +46,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "UniFace version: 1.5.0\n"
+      "UniFace version: 1.6.0\n"
     ]
    }
   ],
@@ -365,7 +365,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
@@ -379,7 +379,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.13.5"
  }
 },
 "nbformat": 4,
--- a/examples/face_search.ipynb
+++ b/examples/face_search.ipynb
@@ -42,7 +42,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/face_verification.ipynb
+++ b/examples/face_verification.ipynb
@@ -37,7 +37,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/gaze_estimation.ipynb
+++ b/examples/gaze_estimation.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "UniFace version: 1.4.0\n"
+      "UniFace version: 1.6.0\n"
     ]
    }
   ],
@@ -152,8 +152,7 @@
    "\n",
    "    # Estimate gaze for each face\n",
    "    for i, face in enumerate(faces):\n",
-    "        bbox = face['bbox']\n",
-    "        x1, y1, x2, y2 = map(int, bbox[:4])\n",
+    "        x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
    "        face_crop = image[y1:y2, x1:x2]\n",
    "\n",
    "        if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
    "            print(f'    Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
    "\n",
    "            # Draw gaze without angle text\n",
-    "            draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
+    "            draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
    "\n",
    "    # Convert BGR to RGB for display\n",
    "    original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
@@ -263,7 +262,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.13.5"
  }
 },
 "nbformat": 4,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uniface"
-version = "1.5.3"
+version = "1.6.0"
 description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
 readme = "README.md"
 license = { text = "MIT" }
--- a/scripts/run_anonymization.py
+++ b/scripts/run_anonymization.py
@@ -205,4 +205,3 @@ Examples:

 if __name__ == '__main__':
    main()
-
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -13,7 +13,7 @@

 __license__ = 'MIT'
 __author__ = 'Yakhyokhuja Valikhujaev'
-__version__ = '1.5.3'
+__version__ = '1.6.0'


 from uniface.face_utils import compute_similarity, face_alignment
--- a/uniface/analyzer.py
+++ b/uniface/analyzer.py
@@ -36,41 +36,24 @@ class FaceAnalyzer:

    def analyze(self, image: np.ndarray) -> List[Face]:
        """Analyze faces in an image."""
-        detections = self.detector.detect(image)
-        Logger.debug(f'Detected {len(detections)} face(s)')
+        faces = self.detector.detect(image)
+        Logger.debug(f'Detected {len(faces)} face(s)')

-        faces = []
-        for idx, detection in enumerate(detections):
-            bbox = detection['bbox']
-            confidence = detection['confidence']
-            landmarks = detection['landmarks']
-
-            embedding = None
+        for idx, face in enumerate(faces):
            if self.recognizer is not None:
                try:
-                    embedding = self.recognizer.get_normalized_embedding(image, landmarks)
-                    Logger.debug(f'  Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
+                    face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
+                    Logger.debug(f'  Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
                except Exception as e:
                    Logger.warning(f'  Face {idx + 1}: Failed to extract embedding: {e}')

-            age, gender = None, None
            if self.age_gender is not None:
                try:
-                    gender, age = self.age_gender.predict(image, bbox)
-                    Logger.debug(f'  Face {idx + 1}: Age={age}, Gender={gender}')
+                    face.gender, face.age = self.age_gender.predict(image, face.bbox)
+                    Logger.debug(f'  Face {idx + 1}: Age={face.age}, Gender={face.gender}')
                except Exception as e:
                    Logger.warning(f'  Face {idx + 1}: Failed to predict age/gender: {e}')

-            face = Face(
-                bbox=bbox,
-                confidence=confidence,
-                landmarks=landmarks,
-                embedding=embedding,
-                age=age,
-                gender=gender,
-            )
-            faces.append(face)
-
        Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
        return faces

--- a/uniface/detection/init.py
+++ b/uniface/detection/init.py
@@ -7,6 +7,8 @@ from typing import Any, Dict, List

 import numpy as np

+from uniface.face import Face
+
 from .base import BaseDetector
 from .retinaface import RetinaFace
 from .scrfd import SCRFD
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
 _detector_cache: Dict[str, BaseDetector] = {}


-def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
+def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
    """
    High-level face detection function.

@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
        **kwargs: Additional arguments passed to the detector.

    Returns:
-        List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
-            - 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
-            - 'confidence' (float): The confidence score of the detection.
-            - 'landmarks' (List[List[float]]): 5-point facial landmarks.
+        List[Face]: A list of Face objects, each containing:
+            - bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
+            - confidence (float): The confidence score of the detection.
+            - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).

    Example:
        >>> from uniface import detect_faces
        >>> image = cv2.imread("your_image.jpg")
        >>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
        >>> for face in faces:
-        ...     print(f"Found face with confidence: {face['confidence']}")
-        ...     print(f"BBox: {face['bbox']}")
+        ...     print(f"Found face with confidence: {face.confidence}")
+        ...     print(f"BBox: {face.bbox}")
    """
    method_name = method.lower()

--- a/uniface/detection/base.py
+++ b/uniface/detection/base.py
@@ -7,6 +7,8 @@ from typing import Any, Dict, List

 import numpy as np

+from uniface.face import Face
+

 class BaseDetector(ABC):
    """
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
        self.config = kwargs

    @abstractmethod
-    def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
+    def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
        """
        Detect faces in an image.

@@ -30,18 +32,17 @@ class BaseDetector(ABC):
            **kwargs: Additional detection parameters

        Returns:
-            List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
-                  or (68, 2) for 68-point landmarks. Empty array if not supported.
+            List[Face]: List of detected Face objects, each containing:
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - confidence (float): Detection confidence score (0.0 to 1.0)
+                - landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks

        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
+            ...     confidence = face.confidence  # float
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
        """
        pass

--- a/uniface/detection/retinaface.py
+++ b/uniface/detection/retinaface.py
@@ -2,7 +2,7 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo

-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple

 import numpy as np

@@ -14,6 +14,7 @@ from uniface.common import (
    resize_image,
 )
 from uniface.constants import RetinaFaceWeights
+from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.

@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
                when using the "default" metric. Defaults to 2.0.

        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+            List[Face]: List of Face objects, each containing:
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - confidence (float): Detection confidence score (0.0 to 1.0)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)

        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
+            ...     confidence = face.confidence  # float
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """

        original_height, original_width = image.shape[:2]
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):

        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
-                'bbox': detections[i, :4],
-                'confidence': float(detections[i, 4]),
-                'landmarks': landmarks[i],
-            }
-            faces.append(face_dict)
+            face = Face(
+                bbox=detections[i, :4],
+                confidence=float(detections[i, 4]),
+                landmarks=landmarks[i],
+            )
+            faces.append(face)

        return faces

@@ -350,19 +351,12 @@ if __name__ == '__main__':

        # Process each detected face
        for face in faces:
-            # Extract bbox and landmarks from dictionary
-            bbox = face['bbox']  # [x1, y1, x2, y2]
-            landmarks = face['landmarks']  # [[x1, y1], [x2, y2], ...]
-            confidence = face['confidence']
+            # Extract bbox and landmarks from Face object
+            draw_bbox(frame, face.bbox, face.confidence)

-            # Pass bbox and confidence separately
-            draw_bbox(frame, bbox, confidence)
-
-            # Convert landmarks to numpy array format if needed
-            if landmarks is not None and len(landmarks) > 0:
-                # Convert list of [x, y] pairs to numpy array
-                points = np.array(landmarks, dtype=np.float32)  # Shape: (5, 2)
-                draw_keypoints(frame, points)
+            # Draw landmarks if available
+            if face.landmarks is not None and len(face.landmarks) > 0:
+                draw_keypoints(frame, face.landmarks)

        # Display face count
        cv2.putText(
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -2,13 +2,14 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo

-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple

 import cv2
 import numpy as np

 from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
 from uniface.constants import SCRFDWeights
+from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.

@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
                when using the "default" metric. Defaults to 2.0.

        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+            List[Face]: List of Face objects, each containing:
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - confidence (float): Detection confidence score (0.0 to 1.0)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)

        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
+            ...     confidence = face.confidence  # float
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """

        original_height, original_width = image.shape[:2]
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):

        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
-                'bbox': detections[i, :4],
-                'confidence': float(detections[i, 4]),
-                'landmarks': landmarks[i],
-            }
-            faces.append(face_dict)
+            face = Face(
+                bbox=detections[i, :4],
+                confidence=float(detections[i, 4]),
+                landmarks=landmarks[i],
+            )
+            faces.append(face)

        return faces

@@ -324,19 +325,12 @@ if __name__ == '__main__':

        # Process each detected face
        for face in faces:
-            # Extract bbox and landmarks from dictionary
-            bbox = face['bbox']  # [x1, y1, x2, y2]
-            landmarks = face['landmarks']  # [[x1, y1], [x2, y2], ...]
-            confidence = face['confidence']
+            # Extract bbox and landmarks from Face object
+            draw_bbox(frame, face.bbox, face.confidence)

-            # Pass bbox and confidence separately
-            draw_bbox(frame, bbox, confidence)
-
-            # Convert landmarks to numpy array format if needed
-            if landmarks is not None and len(landmarks) > 0:
-                # Convert list of [x, y] pairs to numpy array
-                points = np.array(landmarks, dtype=np.float32)  # Shape: (5, 2)
-                draw_keypoints(frame, points)
+            # Draw landmarks if available
+            if face.landmarks is not None and len(face.landmarks) > 0:
+                draw_keypoints(frame, face.landmarks)

        # Display face count
        cv2.putText(
--- a/uniface/detection/yolov5.py
+++ b/uniface/detection/yolov5.py
@@ -2,13 +2,14 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo

-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple

 import cv2
 import numpy as np

 from uniface.common import non_max_suppression
 from uniface.constants import YOLOv5FaceWeights
+from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.

@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
                when using the "default" metric. Defaults to 2.0.

        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+            List[Face]: List of Face objects, each containing:
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - confidence (float): Detection confidence score (0.0 to 1.0)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)

        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
+            ...     confidence = face.confidence  # float
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """

        original_height, original_width = image.shape[:2]
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):

        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
-                'bbox': detections[i, :4],
-                'confidence': float(detections[i, 4]),
-                'landmarks': landmarks[i],
-            }
-            faces.append(face_dict)
+            face = Face(
+                bbox=detections[i, :4],
+                confidence=float(detections[i, 4]),
+                landmarks=landmarks[i],
+            )
+            faces.append(face)

        return faces