feat: Update detection module output to datalasses

2025-12-30 09:02:25 +00:00 · 2025-12-22 19:25:38 +09:00
parent 96306a0910
commit 9bcbfa65c2
18 changed files with 165 additions and 189 deletions
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -39,9 +39,9 @@ faces = detector.detect(image)
 # Print results
 for i, face in enumerate(faces):
    print(f"Face {i+1}:")
-    print(f"  Confidence: {face['confidence']:.2f}")
+    print(f"  Confidence: {face.confidence:.2f}")
-    print(f"  BBox: {face['bbox']}")
+    print(f"  BBox: {face.bbox}")
-    print(f"  Landmarks: {len(face['landmarks'])} points")
+    print(f"  Landmarks: {len(face.landmarks)} points")
 ```
 **Output:**
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)
 # Extract visualization data
-bboxes = [f['bbox'] for f in faces]
+bboxes = [f.bbox for f in faces]
-scores = [f['confidence'] for f in faces]
+scores = [f.confidence for f in faces]
-landmarks = [f['landmarks'] for f in faces]
+landmarks = [f.landmarks for f in faces]
 # Draw on image
 draw_detections(
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
 if faces1 and faces2:
    # Extract embeddings
-    emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
+    emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
-    emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+    emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
    # Compute similarity (cosine similarity)
    similarity = np.dot(emb1, emb2.T)[0][0]
@@ -159,9 +159,9 @@ while True:
    faces = detector.detect(frame)
    # Draw results
-    bboxes = [f['bbox'] for f in faces]
+    bboxes = [f.bbox for f in faces]
-    scores = [f['confidence'] for f in faces]
+    scores = [f.confidence for f in faces]
-    landmarks = [f['landmarks'] for f in faces]
+    landmarks = [f.landmarks for f in faces]
    draw_detections(
        image=frame,
        bboxes=bboxes,
@@ -199,7 +199,7 @@ faces = detector.detect(image)
 # Predict attributes
 for i, face in enumerate(faces):
-    gender, age = age_gender.predict(image, face['bbox'])
+    gender, age = age_gender.predict(image, face.bbox)
    gender_str = 'Female' if gender == 0 else 'Male'
    print(f"Face {i+1}: {gender_str}, {age} years old")
 ```
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)
 if faces:
-    landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+    landmarks = landmarker.get_landmarks(image, faces[0].bbox)
    print(f"Detected {len(landmarks)} landmarks")
    # Draw landmarks
@@ -262,8 +262,7 @@ faces = detector.detect(image)
 # Estimate gaze for each face
 for i, face in enumerate(faces):
-    bbox = face['bbox']
+    x1, y1, x2, y2 = map(int, face.bbox[:4])
    x1, y1, x2, y2 = map(int, bbox[:4])
    face_crop = image[y1:y2, x1:x2]
    if face_crop.size > 0:
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
        print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
        # Draw gaze direction
-        draw_gaze(image, bbox, pitch, yaw)
+        draw_gaze(image, face.bbox, pitch, yaw)
 cv2.imwrite("gaze_output.jpg", image)
 ```
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
 faces = detector.detect(image)
 for i, face in enumerate(faces):
-    label_idx, score = spoofer.predict(image, face['bbox'])
+    label_idx, score = spoofer.predict(image, face.bbox)
    # label_idx: 0 = Fake, 1 = Real
    label = 'Real' if label_idx == 1 else 'Fake'
    print(f"Face {i+1}: {label} ({score:.1%})")
--- a/README.md
+++ b/README.md
@@ -101,9 +101,9 @@ faces = detector.detect(image)
 # Process results
 for face in faces:
-    bbox = face['bbox']  # [x1, y1, x2, y2]
+    bbox = face.bbox  # np.ndarray [x1, y1, x2, y2]
-    confidence = face['confidence']
+    confidence = face.confidence
-    landmarks = face['landmarks']  # 5-point landmarks
+    landmarks = face.landmarks  # np.ndarray (5, 2) landmarks
    print(f"Face detected with confidence: {confidence:.2f}")
 ```
@@ -121,8 +121,8 @@ recognizer = ArcFace()
 faces1 = detector.detect(image1)
 faces2 = detector.detect(image2)
-embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
+embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
-embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
+embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
 # Compare faces
 similarity = compute_similarity(embedding1, embedding2)
@@ -138,7 +138,7 @@ detector = RetinaFace()
 landmarker = Landmark106()
 faces = detector.detect(image)
-landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
+landmarks = landmarker.get_landmarks(image, faces[0].bbox)
 # Returns 106 (x, y) landmark points
 ```
@@ -151,7 +151,7 @@ detector = RetinaFace()
 age_gender = AgeGender()
 faces = detector.detect(image)
-gender, age = age_gender.predict(image, faces[0]['bbox'])
+gender, age = age_gender.predict(image, faces[0].bbox)
 gender_str = 'Female' if gender == 0 else 'Male'
 print(f"{gender_str}, {age} years old")
 ```
@@ -168,15 +168,14 @@ gaze_estimator = MobileGaze()
 faces = detector.detect(image)
 for face in faces:
-    bbox = face['bbox']
+    x1, y1, x2, y2 = map(int, face.bbox[:4])
    x1, y1, x2, y2 = map(int, bbox[:4])
    face_crop = image[y1:y2, x1:x2]
    pitch, yaw = gaze_estimator.estimate(face_crop)
    print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
    # Visualize
-    draw_gaze(image, bbox, pitch, yaw)
+    draw_gaze(image, face.bbox, pitch, yaw)
 ```
 ### Face Parsing
@@ -213,7 +212,7 @@ spoofer = MiniFASNet()  # Uses V2 by default
 faces = detector.detect(image)
 for face in faces:
-    label_idx, score = spoofer.predict(image, face['bbox'])
+    label_idx, score = spoofer.predict(image, face.bbox)
    # label_idx: 0 = Fake, 1 = Real
    label = 'Real' if label_idx == 1 else 'Fake'
    print(f"{label}: {score:.1%}")
@@ -458,9 +457,9 @@ while True:
    faces = detector.detect(frame)
    # Extract data for visualization
-    bboxes = [f['bbox'] for f in faces]
+    bboxes = [f.bbox for f in faces]
-    scores = [f['confidence'] for f in faces]
+    scores = [f.confidence for f in faces]
-    landmarks = [f['landmarks'] for f in faces]
+    landmarks = [f.landmarks for f in faces]
    draw_detections(
        image=frame,
@@ -494,7 +493,7 @@ for person_id, image_path in person_images.items():
    faces = detector.detect(image)
    if faces:
        embedding = recognizer.get_normalized_embedding(
-            image, faces[0]['landmarks']
+            image, faces[0].landmarks
        )
        database[person_id] = embedding
@@ -503,7 +502,7 @@ query_image = cv2.imread("query.jpg")
 query_faces = detector.detect(query_image)
 if query_faces:
    query_embedding = recognizer.get_normalized_embedding(
-        query_image, query_faces[0]['landmarks']
+        query_image, query_faces[0].landmarks
    )
    # Find best match
--- a/examples/face_alignment.ipynb
+++ b/examples/face_alignment.ipynb
@@ -48,7 +48,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
@@ -140,13 +140,13 @@
    "\n",
    "    # Draw detections\n",
    "    bbox_image = image.copy()\n",
-    "    bboxes = [f['bbox'] for f in faces]\n",
+    "    bboxes = [f.bbox for f in faces]\n",
-    "    scores = [f['confidence'] for f in faces]\n",
+    "    scores = [f.confidence for f in faces]\n",
-    "    landmarks = [f['landmarks'] for f in faces]\n",
+    "    landmarks = [f.landmarks for f in faces]\n",
    "    draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
    "    # Align first detected face (returns aligned image and inverse transform matrix)\n",
-    "    first_landmarks = faces[0]['landmarks']\n",
+    "    first_landmarks = faces[0].landmarks\n",
    "    aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
    "\n",
    "    # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
   "source": [
    "## Notes\n",
    "\n",
-    "- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
+    "- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
    "- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
    "- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
    "- Default output size is 112x112 (standard for face recognition models)\n"
   ]
--- a/examples/face_analyzer.ipynb
+++ b/examples/face_analyzer.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/face_anonymization.ipynb
+++ b/examples/face_anonymization.ipynb
--- a/examples/face_detection.ipynb
+++ b/examples/face_detection.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
@@ -153,14 +153,14 @@
    "# Load image\n",
    "image = cv2.imread(image_path)\n",
    "\n",
-    "# Detect faces - returns list of face dictionaries\n",
+    "# Detect faces - returns list of Face objects\n",
    "faces = detector.detect(image)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
    "# Unpack face data for visualization\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "# Draw detections\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
    "faces = detector.detect(image, max_num=2)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
@@ -258,9 +258,9 @@
    "faces = detector.detect(image, max_num=5)\n",
    "print(f'Detected {len(faces)} face(s)')\n",
    "\n",
-    "bboxes = [f['bbox'] for f in faces]\n",
+    "bboxes = [f.bbox for f in faces]\n",
-    "scores = [f['confidence'] for f in faces]\n",
+    "scores = [f.confidence for f in faces]\n",
-    "landmarks = [f['landmarks'] for f in faces]\n",
+    "landmarks = [f.landmarks for f in faces]\n",
    "\n",
    "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
    "\n",
@@ -274,7 +274,8 @@
   "source": [
    "## Notes\n",
    "\n",
-    "- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
+    "- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
    "- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
    "- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
    "- Use `max_num` to limit detected faces"
   ]
--- a/examples/face_parsing.ipynb
+++ b/examples/face_parsing.ipynb
@@ -46,7 +46,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "UniFace version: 1.5.0\n"
+      "UniFace version: 1.6.0\n"
     ]
    }
   ],
@@ -365,7 +365,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
@@ -379,7 +379,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.13.5"
  }
 },
 "nbformat": 4,
--- a/examples/face_search.ipynb
+++ b/examples/face_search.ipynb
@@ -42,7 +42,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/face_verification.ipynb
+++ b/examples/face_verification.ipynb
@@ -37,7 +37,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1.3.1\n"
+      "1.6.0\n"
     ]
    }
   ],
--- a/examples/gaze_estimation.ipynb
+++ b/examples/gaze_estimation.ipynb
@@ -44,7 +44,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "UniFace version: 1.4.0\n"
+      "UniFace version: 1.6.0\n"
     ]
    }
   ],
@@ -152,8 +152,7 @@
    "\n",
    "    # Estimate gaze for each face\n",
    "    for i, face in enumerate(faces):\n",
-    "        bbox = face['bbox']\n",
+    "        x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
    "        x1, y1, x2, y2 = map(int, bbox[:4])\n",
    "        face_crop = image[y1:y2, x1:x2]\n",
    "\n",
    "        if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
    "            print(f'    Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
    "\n",
    "            # Draw gaze without angle text\n",
-    "            draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
+    "            draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
    "\n",
    "    # Convert BGR to RGB for display\n",
    "    original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
@@ -263,7 +262,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.13.5"
  }
 },
 "nbformat": 4,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uniface"
-version = "1.5.3"
+version = "1.6.0"
 description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
 readme = "README.md"
 license = { text = "MIT" }
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -13,7 +13,7 @@
 __license__ = 'MIT'
 __author__ = 'Yakhyokhuja Valikhujaev'
-__version__ = '1.5.3'
+__version__ = '1.6.0'
 from uniface.face_utils import compute_similarity, face_alignment
--- a/uniface/analyzer.py
+++ b/uniface/analyzer.py
@@ -36,41 +36,24 @@ class FaceAnalyzer:
    def analyze(self, image: np.ndarray) -> List[Face]:
        """Analyze faces in an image."""
-        detections = self.detector.detect(image)
+        faces = self.detector.detect(image)
-        Logger.debug(f'Detected {len(detections)} face(s)')
+        Logger.debug(f'Detected {len(faces)} face(s)')
-        faces = []
+        for idx, face in enumerate(faces):
        for idx, detection in enumerate(detections):
            bbox = detection['bbox']
            confidence = detection['confidence']
            landmarks = detection['landmarks']
            embedding = None
            if self.recognizer is not None:
                try:
-                    embedding = self.recognizer.get_normalized_embedding(image, landmarks)
+                    face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
-                    Logger.debug(f'  Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
+                    Logger.debug(f'  Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
                except Exception as e:
                    Logger.warning(f'  Face {idx + 1}: Failed to extract embedding: {e}')
            age, gender = None, None
            if self.age_gender is not None:
                try:
-                    gender, age = self.age_gender.predict(image, bbox)
+                    face.gender, face.age = self.age_gender.predict(image, face.bbox)
-                    Logger.debug(f'  Face {idx + 1}: Age={age}, Gender={gender}')
+                    Logger.debug(f'  Face {idx + 1}: Age={face.age}, Gender={face.gender}')
                except Exception as e:
                    Logger.warning(f'  Face {idx + 1}: Failed to predict age/gender: {e}')
            face = Face(
                bbox=bbox,
                confidence=confidence,
                landmarks=landmarks,
                embedding=embedding,
                age=age,
                gender=gender,
            )
            faces.append(face)
        Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
        return faces
--- a/uniface/detection/init.py
+++ b/uniface/detection/init.py
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
 import numpy as np
 from uniface.face import Face
 from .base import BaseDetector
 from .retinaface import RetinaFace
 from .scrfd import SCRFD
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
 _detector_cache: Dict[str, BaseDetector] = {}
-def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
+def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
    """
    High-level face detection function.
@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
        **kwargs: Additional arguments passed to the detector.
    Returns:
-        List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
+        List[Face]: A list of Face objects, each containing:
-            - 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
+            - bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
-            - 'confidence' (float): The confidence score of the detection.
+            - confidence (float): The confidence score of the detection.
-            - 'landmarks' (List[List[float]]): 5-point facial landmarks.
+            - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
    Example:
        >>> from uniface import detect_faces
        >>> image = cv2.imread("your_image.jpg")
        >>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
        >>> for face in faces:
-        ...     print(f"Found face with confidence: {face['confidence']}")
+        ...     print(f"Found face with confidence: {face.confidence}")
-        ...     print(f"BBox: {face['bbox']}")
+        ...     print(f"BBox: {face.bbox}")
    """
    method_name = method.lower()
--- a/uniface/detection/base.py
+++ b/uniface/detection/base.py
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
 import numpy as np
 from uniface.face import Face
 class BaseDetector(ABC):
    """
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
        self.config = kwargs
    @abstractmethod
-    def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
+    def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
        """
        Detect faces in an image.
@@ -30,18 +32,17 @@ class BaseDetector(ABC):
            **kwargs: Additional detection parameters
        Returns:
-            List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
+            List[Face]: List of detected Face objects, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
+                - confidence (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
+                - landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
                  or (68, 2) for 68-point landmarks. Empty array if not supported.
        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
+            ...     confidence = face.confidence  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
        """
        pass
--- a/uniface/detection/retinaface.py
+++ b/uniface/detection/retinaface.py
@@ -2,7 +2,7 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple
 import numpy as np
@@ -14,6 +14,7 @@ from uniface.common import (
    resize_image,
 )
 from uniface.constants import RetinaFaceWeights
 from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
                when using the "default" metric. Defaults to 2.0.
        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
+            List[Face]: List of Face objects, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
+                - confidence (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
+            ...     confidence = face.confidence  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """
        original_height, original_width = image.shape[:2]
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):
        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
+            face = Face(
-                'bbox': detections[i, :4],
+                bbox=detections[i, :4],
-                'confidence': float(detections[i, 4]),
+                confidence=float(detections[i, 4]),
-                'landmarks': landmarks[i],
+                landmarks=landmarks[i],
-            }
+            )
-            faces.append(face_dict)
+            faces.append(face)
        return faces
@@ -350,19 +351,12 @@ if __name__ == '__main__':
        # Process each detected face
        for face in faces:
-            # Extract bbox and landmarks from dictionary
+            # Extract bbox and landmarks from Face object
-            bbox = face['bbox']  # [x1, y1, x2, y2]
+            draw_bbox(frame, face.bbox, face.confidence)
            landmarks = face['landmarks']  # [[x1, y1], [x2, y2], ...]
            confidence = face['confidence']
-            # Pass bbox and confidence separately
+            # Draw landmarks if available
-            draw_bbox(frame, bbox, confidence)
+            if face.landmarks is not None and len(face.landmarks) > 0:
-
+                draw_keypoints(frame, face.landmarks)
            # Convert landmarks to numpy array format if needed
            if landmarks is not None and len(landmarks) > 0:
                # Convert list of [x, y] pairs to numpy array
                points = np.array(landmarks, dtype=np.float32)  # Shape: (5, 2)
                draw_keypoints(frame, points)
        # Display face count
        cv2.putText(
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -2,13 +2,14 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple
 import cv2
 import numpy as np
 from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
 from uniface.constants import SCRFDWeights
 from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
                when using the "default" metric. Defaults to 2.0.
        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
+            List[Face]: List of Face objects, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
+                - confidence (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
+            ...     confidence = face.confidence  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """
        original_height, original_width = image.shape[:2]
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):
        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
+            face = Face(
-                'bbox': detections[i, :4],
+                bbox=detections[i, :4],
-                'confidence': float(detections[i, 4]),
+                confidence=float(detections[i, 4]),
-                'landmarks': landmarks[i],
+                landmarks=landmarks[i],
-            }
+            )
-            faces.append(face_dict)
+            faces.append(face)
        return faces
@@ -324,19 +325,12 @@ if __name__ == '__main__':
        # Process each detected face
        for face in faces:
-            # Extract bbox and landmarks from dictionary
+            # Extract bbox and landmarks from Face object
-            bbox = face['bbox']  # [x1, y1, x2, y2]
+            draw_bbox(frame, face.bbox, face.confidence)
            landmarks = face['landmarks']  # [[x1, y1], [x2, y2], ...]
            confidence = face['confidence']
-            # Pass bbox and confidence separately
+            # Draw landmarks if available
-            draw_bbox(frame, bbox, confidence)
+            if face.landmarks is not None and len(face.landmarks) > 0:
-
+                draw_keypoints(frame, face.landmarks)
            # Convert landmarks to numpy array format if needed
            if landmarks is not None and len(landmarks) > 0:
                # Convert list of [x, y] pairs to numpy array
                points = np.array(landmarks, dtype=np.float32)  # Shape: (5, 2)
                draw_keypoints(frame, points)
        # Display face count
        cv2.putText(
--- a/uniface/detection/yolov5.py
+++ b/uniface/detection/yolov5.py
@@ -2,13 +2,14 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo
-from typing import Any, Dict, List, Literal, Tuple
+from typing import Any, List, Literal, Tuple
 import cv2
 import numpy as np
 from uniface.common import non_max_suppression
 from uniface.constants import YOLOv5FaceWeights
 from uniface.face import Face
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.onnx_utils import create_onnx_session
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
        max_num: int = 0,
        metric: Literal['default', 'max'] = 'max',
        center_weight: float = 2.0,
-    ) -> List[Dict[str, Any]]:
+    ) -> List[Face]:
        """
        Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
                when using the "default" metric. Defaults to 2.0.
        Returns:
-            List[Dict[str, Any]]: List of face detection dictionaries, each containing:
+            List[Face]: List of Face objects, each containing:
-                - 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
+                - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
-                - 'confidence' (float): Detection confidence score (0.0 to 1.0)
+                - confidence (float): Detection confidence score (0.0 to 1.0)
-                - 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
+                - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
        Example:
            >>> faces = detector.detect(image)
            >>> for face in faces:
-            ...     bbox = face['bbox']  # np.ndarray with shape (4,)
+            ...     bbox = face.bbox  # np.ndarray with shape (4,)
-            ...     confidence = face['confidence']  # float
+            ...     confidence = face.confidence  # float
-            ...     landmarks = face['landmarks']  # np.ndarray with shape (5, 2)
+            ...     landmarks = face.landmarks  # np.ndarray with shape (5, 2)
            ...     # Can pass landmarks directly to recognition
-            ...     embedding = recognizer.get_normalized_embedding(image, landmarks)
+            ...     embedding = recognizer.get_normalized_embedding(image, face.landmarks)
        """
        original_height, original_width = image.shape[:2]
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
        faces = []
        for i in range(detections.shape[0]):
-            face_dict = {
+            face = Face(
-                'bbox': detections[i, :4],
+                bbox=detections[i, :4],
-                'confidence': float(detections[i, 4]),
+                confidence=float(detections[i, 4]),
-                'landmarks': landmarks[i],
+                landmarks=landmarks[i],
-            }
+            )
-            faces.append(face_dict)
+            faces.append(face)
        return faces