feat: Update recognition, landmark modules

2025-12-30 00:52:25 +00:00 · 2025-07-02 18:52:28 +09:00
parent b15504dfc5
commit 0417f7531f
13 changed files with 633 additions and 451 deletions
--- a/scripts/run_detection.py
+++ b/scripts/run_detection.py
@@ -4,16 +4,17 @@ import time
 import argparse
 import numpy as np

-from uniface.detection import RetinaFace, draw_detections, SCRFD
-from uniface.constants import RetinaFaceWeights, SCRFDWeights
+# UPDATED: Use the factory function and import from the new location
+from uniface.detection import create_detector
+from uniface.visualization import draw_detections


-def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
+def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
    """
    Run face detection on a single image.

    Args:
-        model (RetinaFace): Initialized RetinaFace model.
+        detector: Initialized face detector.
        image_path (str): Path to input image.
        vis_threshold (float): Threshold for drawing detections.
        save_dir (str): Directory to save output image.
@@ -23,8 +24,18 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
        print(f"❌ Error: Failed to load image from '{image_path}'")
        return

-    boxes, landmarks = model.detect(image)
-    draw_detections(image, (boxes, landmarks), vis_threshold)
+    # 1. Get the list of face dictionaries from the detector
+    faces = detector.detect(image)
+    
+    if faces:
+        # 2. Unpack the data into separate lists
+        bboxes = [face['bbox'] for face in faces]
+        scores = [face['confidence'] for face in faces]
+        landmarks = [face['landmarks'] for face in faces]
+
+        # 3. Pass the unpacked lists to the drawing function
+        draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
+

    os.makedirs(save_dir, exist_ok=True)
    output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
@@ -33,28 +44,38 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):


 def main():
-    parser = argparse.ArgumentParser(description="Run RetinaFace inference on an image.")
+    parser = argparse.ArgumentParser(description="Run face detection on an image.")
    parser.add_argument("--image", type=str, required=True, help="Path to the input image")
-    parser.add_argument("--model", type=str, default="MNET_V2", choices=[m.name for m in RetinaFaceWeights], help="Model variant to use")
+    parser.add_argument(
+        "--method",
+        type=str,
+        default="retinaface",
+        choices=['retinaface', 'scrfd'],
+        help="Detection method to use."
+    )
    parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
    parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
    parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")

    args = parser.parse_args()

-    model_name = RetinaFaceWeights[args.model]
-    model = RetinaFace(model_name=model_name)
+    print(f"Initializing detector: {args.method}")
+    detector = create_detector(method=args.method)

    avg_time = 0
    for i in range(args.iterations):
        start = time.time()
-        run_inference(model, args.image, args.threshold, args.save_dir)
+        run_inference(detector, args.image, args.threshold, args.save_dir)
        elapsed = time.time() - start
        print(f"[{i + 1}/{args.iterations}] ⏱️ Inference time: {elapsed:.4f} seconds")
-        avg_time += elapsed
+        if i >= 0:  # Avoid counting the first run if it includes model loading time
+            avg_time += elapsed

    if args.iterations > 1:
-        print(f"\n🔥 Average inference time over {args.iterations} runs: {avg_time / args.iterations:.4f} seconds")
+        # Adjust average calculation to exclude potential first-run overhead
+        effective_iterations = max(1, args.iterations)
+        print(
+            f"\n🔥 Average inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")


 if __name__ == "__main__":
--- a/scripts/run_face_search.py
+++ b/scripts/run_face_search.py
@@ -0,0 +1,101 @@
+import cv2
+import argparse
+import numpy as np
+
+# Use the new high-level factory functions
+from uniface.detection import create_detector
+from uniface.recognition import create_recognizer
+from uniface.face_utils import compute_similarity
+
+
+def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
+    """Extracts a normalized embedding from the first face found in an image."""
+    image = cv2.imread(image_path)
+    if image is None:
+        raise RuntimeError(f"Failed to load image: {image_path}")
+
+    faces = detector.detect(image)
+    if not faces:
+        raise RuntimeError("No faces found in reference image.")
+
+    # Get landmarks from the first detected face dictionary
+    landmarks = np.array(faces[0]['landmarks'])
+    
+    # Use normalized embedding for more reliable similarity comparison
+    embedding = recognizer.get_normalized_embedding(image, landmarks)
+    return embedding
+
+
+def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
+    """Run real-time face recognition from a webcam feed."""
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        raise RuntimeError("Webcam could not be opened.")
+    print("Webcam started. Press 'q' to quit.")
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        faces = detector.detect(frame)
+
+        # Loop through each detected face
+        for face in faces:
+            # Extract bbox and landmarks from the dictionary
+            bbox = face['bbox']
+            landmarks = np.array(face['landmarks'])
+            
+            x1, y1, x2, y2 = map(int, bbox)
+            
+            # Get the normalized embedding for the current face
+            embedding = recognizer.get_normalized_embedding(frame, landmarks)
+            
+            # Compare with the reference embedding
+            sim = compute_similarity(ref_embedding, embedding)
+            
+            # Draw results
+            label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
+            color = (0, 255, 0) if sim > threshold else (0, 0, 255)
+            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+
+        cv2.imshow("Face Recognition", frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
+    parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
+    parser.add_argument(
+        "--detector",
+        type=str,
+        default="scrfd",
+        choices=['retinaface', 'scrfd'],
+        help="Face detection method."
+    )
+    parser.add_argument(
+        "--recognizer",
+        type=str,
+        default="arcface",
+        choices=['arcface', 'mobileface', 'sphereface'],
+        help="Face recognition method."
+    )
+    args = parser.parse_args()
+
+    print("Initializing models...")
+    detector = create_detector(method=args.detector)
+    recognizer = create_recognizer(method=args.recognizer)
+    
+    print("Extracting reference embedding...")
+    ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
+    
+    run_video(detector, recognizer, ref_embedding)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/run_recognition.py
+++ b/scripts/run_recognition.py
@@ -2,18 +2,21 @@ import cv2
 import argparse
 import numpy as np

-from uniface.detection import RetinaFace
-from uniface.constants import RetinaFaceWeights
-from uniface.recognition import ArcFace
+# Use the new high-level factory functions for consistency
+from uniface.detection import create_detector
+from uniface.recognition import create_recognizer
+
+# Import enums for argument choices
+from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights


-def run_inference(detector, recognizer, image_path):
+def run_inference(detector, recognizer, image_path: str):
    """
    Detect faces and extract embeddings from a single image.

    Args:
-        detector (RetinaFace): Initialized face detector.
-        recognizer (ArcFace): Face recognition model.
+        detector: Initialized face detector.
+        recognizer: Initialized face recognition model.
        image_path (str): Path to the input image.
    """
    image = cv2.imread(image_path)
@@ -21,36 +24,53 @@ def run_inference(detector, recognizer, image_path):
        print(f"Error: Failed to load image from '{image_path}'")
        return

-    boxes, landmarks = detector.detect(image)
+    faces = detector.detect(image)

-    if len(boxes) == 0:
+    if not faces:
        print("No faces detected.")
        return

-    print(f"Detected {len(boxes)} face(s). Extracting embeddings...")
+    print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")

-    for i, landmark in enumerate(landmarks[:1]):
-        embedding = recognizer.get_embedding(image, landmark)
-        norm_embedding = recognizer.get_normalized_embedding(image, landmark)
-        print("embedding:", np.sum(embedding))
-        print("norm embedding:",np.sum(norm_embedding))
+    # Process the first detected face
+    first_face = faces[0]
+    landmarks = np.array(first_face['landmarks'])  # Convert landmarks to numpy array
+
+    # Extract embedding using the landmarks from the face dictionary
+    embedding = recognizer.get_embedding(image, landmarks)
+    norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
+
+    # Print some info about the embeddings
+    print(f"  - Embedding shape: {embedding.shape}")
+    print(f"  - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
+    print(f"  - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")


 def main():
    parser = argparse.ArgumentParser(description="Extract face embeddings from a single image.")
    parser.add_argument("--image", type=str, required=True, help="Path to the input image.")
    parser.add_argument(
-        "--model",
+        "--detector",
        type=str,
-        default="MNET_V2",
-        choices=[m.name for m in RetinaFaceWeights],
-        help="RetinaFace model variant to use."
+        default="retinaface",
+        choices=['retinaface', 'scrfd'],
+        help="Face detection method to use."
+    )
+    parser.add_argument(
+        "--recognizer",
+        type=str,
+        default="arcface",
+        choices=['arcface', 'mobileface', 'sphereface'],
+        help="Face recognition method to use."
    )

    args = parser.parse_args()

-    detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
-    recognizer = ArcFace()
+    print(f"Initializing detector: {args.detector}")
+    detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2)
+
+    print(f"Initializing recognizer: {args.recognizer}")
+    recognizer = create_recognizer(method=args.recognizer)

    run_inference(detector, recognizer, args.image)

--- a/scripts/search_face.py
+++ b/scripts/search_face.py
@@ -1,69 +0,0 @@
-import cv2
-import argparse
-import numpy as np
-
-from uniface.detection import RetinaFace
-from uniface.constants import RetinaFaceWeights
-from uniface.recognition import ArcFace
-from uniface.face_utils import compute_similarity
-
-
-def extract_reference_embedding(detector, recognizer, image_path):
-    image = cv2.imread(image_path)
-    if image is None:
-        raise RuntimeError(f"Failed to load image: {image_path}")
-
-    boxes, landmarks = detector.detect(image)
-    if len(boxes) == 0:
-        raise RuntimeError("No faces found in reference image.")
-
-    embedding = recognizer.get_embedding(image, landmarks[0])
-    return embedding
-
-
-def run_video(detector, recognizer, ref_embedding, threshold=0.30):
-    cap = cv2.VideoCapture(0)
-    if not cap.isOpened():
-        raise RuntimeError("Webcam could not be opened.")
-
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-
-        boxes, landmarks = detector.detect(frame)
-
-        for box, lm in zip(boxes, landmarks):
-            x1, y1, x2, y2 = map(int, box[:4])
-            embedding = recognizer.get_embedding(frame, lm)
-            sim = compute_similarity(ref_embedding, embedding)
-            label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
-            color = (0, 255, 0) if sim > threshold else (0, 0, 255)
-
-            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
-            cv2.putText(frame, label, (x1, y1 - 10),
-                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
-
-        cv2.imshow("Face Recognition", frame)
-        if cv2.waitKey(1) & 0xFF == ord('q'):
-            break
-
-    cap.release()
-    cv2.destroyAllWindows()
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
-    parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
-    parser.add_argument("--model", type=str, default="MNET_V2",
-                        choices=[m.name for m in RetinaFaceWeights], help="Face detector model.")
-    args = parser.parse_args()
-
-    detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
-    recognizer = ArcFace()
-    ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
-    run_video(detector, recognizer, ref_embedding)
-
-
-if __name__ == "__main__":
-    main()
--- a/uniface/init.py
+++ b/uniface/init.py
@@ -15,7 +15,10 @@ __license__ = "MIT"
 __author__ = "Yakhyokhuja Valikhujaev"
 __version__ = "0.1.8"

+
 from .detection import detect_faces, create_detector, list_available_detectors
+from .recognition import create_recognizer
+from .landmark import create_landmarker

 from uniface.face_utils import face_alignment, compute_similarity
 from uniface.model_store import verify_model_weights
@@ -25,22 +28,20 @@ from uniface.log import Logger


 __all__ = [
-    # Metadata
-    "__version__",
-    "__author__",
-    "__license__",
+    '__author__',
+    '__license__',
+    '__version__',

-    # Core functions
-    'detect_faces',
    'create_detector',
+    'create_landmarker',
+    'create_recognizer',
+    'detect_faces',
    'list_available_detectors',

-    # Utility functions
-    "face_alignment",
-    "compute_similarity",
-    "verify_model_weights",
-    "draw_detections",
+    'compute_similarity',
+    'draw_detections',
+    'face_alignment',
+    'verify_model_weights',

-    # Classes
-    "Logger",
+    'Logger'
 ]
--- a/uniface/landmark/init.py
+++ b/uniface/landmark/init.py
@@ -1 +1,32 @@
-from .model import Landmark
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+from .models import Landmark106
+from .base import BaseLandmarker
+
+
+def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker:
+    """
+    Factory function to create facial landmark predictors.
+
+    Args:
+        method (str): Landmark prediction method. Options: '106'.
+        **kwargs: Model-specific parameters.
+
+    Returns:
+        Initialized landmarker instance.
+    """
+    method = method.lower()
+    if method == 'insightface_106':
+        return Landmark106(**kwargs)
+    else:
+        available = ['insightface_106']
+        raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
+
+
+__all__ = [
+    "create_landmarker",
+    "Landmark106",
+    "BaseLandmarker"
+]
--- a/uniface/landmark/base.py
+++ b/uniface/landmark/base.py
@@ -0,0 +1,30 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+from abc import ABC, abstractmethod
+import numpy as np
+
+
+class BaseLandmarker(ABC):
+    """
+    Abstract Base Class for all facial landmark models.
+    """
+    @abstractmethod
+    def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
+        """
+        Predicts facial landmarks for a given face bounding box.
+
+        This method defines the standard interface for all landmark predictors.
+        It takes a full image and a bounding box for a single face and returns
+        the predicted keypoints for that face.
+
+        Args:
+            image (np.ndarray): The full source image in BGR format.
+            bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
+
+        Returns:
+            np.ndarray: An array of predicted landmark points with shape (N, 2),
+                        where N is the number of landmarks.
+        """
+        raise NotImplementedError
--- a/uniface/landmark/model.py
+++ b/uniface/landmark/model.py
@@ -1,247 +0,0 @@
-# Copyright 2025 Yakhyokhuja Valikhujaev
-# Author: Yakhyokhuja Valikhujaev
-# GitHub: https://github.com/yakhyo
-
-import cv2
-import numpy as np
-import onnxruntime as ort
-
-from typing import Tuple
-
-from uniface.log import Logger
-from uniface.constants import LandmarkWeights
-from uniface.model_store import verify_model_weights
-from uniface.face_utils import bbox_center_alignment, transform_points_2d
-
-__all__ = ['Landmark']
-
-
-class Landmark:
-    """
-    Facial landmark detection model for predicting 106 facial keypoints using ONNX model.
-
-    This class wraps a pretrained facial landmark model to detect 106 key facial points
-    such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box.
-    It handles model verification, input preprocessing, ONNX inference execution,
-    and projection of landmark coordinates back to the original image space.
-
-    Attributes:
-        input_size (Tuple[int, int]): Model's expected input resolution (width, height).
-        input_mean (float): Mean value used for input normalization.
-        input_std (float): Standard deviation used for input normalization.
-        model_path (str): Path to the verified ONNX model file.
-        session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
-        input_names (List[str]): List of input node names.
-        output_names (List[str]): List of output node names.
-        lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y).
-        lmk_num (int): Total number of landmark points predicted by the model (106).
-    
-    Args:
-        model_name (LandmarkWeights): Enum specifying the landmark model to load.
-        input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192).
-    """
-
-    def __init__(
-        self,
-        model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
-        input_size: Tuple[int, int] = (192, 192)
-    ) -> None:
-        """
-        Initializes the Facial Landmark model for inference.
-
-        Args:
-            model_name: Enum specifying which landmark model weights to use
-            input_size: Input resolution for the model (width, height)
-        """
-        Logger.info(
-            f"Initializing Facial Landmark with model={model_name}, "
-            f"input_size={input_size}"
-        )
-
-        # Initialize configuration
-        self.input_size = input_size
-        self.input_std = 1.0
-        self.input_mean = 0.0
-
-        # Get path to model weights
-        self.model_path = verify_model_weights(model_name)
-        Logger.info(f"Verified model weights located at: {self.model_path}")
-
-        # Initialize model
-        self._initialize_model()
-
-    def _initialize_model(self):
-        """
-        Initialize the ONNX model from the stored model path.
-
-        Raises:
-            RuntimeError: If the model fails to load or initialize.
-        """
-        try:
-            self.session = ort.InferenceSession(
-                self.model_path,
-                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
-            )
-
-            # Get input configuration
-            input_metadata = self.session.get_inputs()[0]
-            input_shape = input_metadata.shape
-            self.input_size = tuple(input_shape[2:4][::-1])  # Update input size from model
-
-            # Get input/output names
-            self.input_names = [input.name for input in self.session.get_inputs()]
-            self.output_names = [output.name for output in self.session.get_outputs()]
-
-            # Determine landmark dimensions from output shape
-            output_shape = self.session.get_outputs()[0].shape
-            self.lmk_dim = 2  # x,y coordinates
-            self.lmk_num = output_shape[1] // self.lmk_dim  # Number of landmarks
-
-            Logger.info(f"Model initialized with {self.lmk_num} landmarks")
-
-        except Exception as e:
-            Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
-            raise RuntimeError(f"Failed to initialize landmark model: {e}")
-
-    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-        """
-        Preprocess the input image and bounding box for inference.
-
-        Args:
-            image: Input image in BGR format
-            bbox: Bounding box coordinates [x1, y1, x2, y2]
-
-        Returns:
-            Tuple containing:
-                - Preprocessed image blob ready for inference
-                - Transformation matrix for mapping predictions back to original image
-        """
-        # Calculate face dimensions and center
-        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
-        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
-
-        # Determine scale to fit face with some margin
-        scale = self.input_size[0] / (max(width, height) * 1.5)
-        rotation = 0.0
-
-        # Align face using center, scale and rotation
-        aligned_face, transform_matrix = bbox_center_alignment(
-            image, center, self.input_size[0], scale, rotation
-        )
-
-        # Convert to blob format for inference
-        face_blob = cv2.dnn.blobFromImage(
-            aligned_face,
-            1.0 / self.input_std,
-            self.input_size,
-            (self.input_mean, self.input_mean, self.input_mean),
-            swapRB=True  # Convert BGR to RGB
-        )
-
-        return face_blob, transform_matrix
-
-    def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
-        """
-        Convert raw model predictions to image coordinates.
-
-        Args:
-            predictions: Raw landmark coordinates from model output
-            transform_matrix: Affine transformation matrix from preprocessing
-
-        Returns:
-            Landmarks in original image coordinates
-        """
-        # Reshape to pairs of x,y coordinates
-        landmarks = predictions.reshape((-1, 2))
-
-        # Denormalize coordinates to pixel space
-        landmarks[:, 0:2] += 1  # Shift from [-1,1] to [0,2] range
-        landmarks[:, 0:2] *= (self.input_size[0] // 2)  # Scale to pixel coordinates
-
-        # Invert the transformation to map back to original image
-        inverse_matrix = cv2.invertAffineTransform(transform_matrix)
-        landmarks = transform_points_2d(landmarks, inverse_matrix)
-
-        return landmarks
-
-    def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
-        """
-        Predict facial landmarks for the given image and face bounding box.
-
-        Args:
-            image: Input image in BGR format
-            bbox: Face bounding box [x1, y1, x2, y2]
-
-        Returns:
-            Array of facial landmarks in original image coordinates
-        """
-        # Preprocess image
-        face_blob, transform_matrix = self.preprocess(image, bbox)
-
-        # Run inference
-        raw_predictions = self.session.run(
-            self.output_names,
-            {self.input_names[0]: face_blob}
-        )[0][0]
-
-        # Postprocess to get landmarks in original image space
-        landmarks = self.postprocess(raw_predictions, transform_matrix)
-
-        return landmarks
-
-# TODO: For testing purposes only, remote later
-
-
-if __name__ == "__main__":
-    from uniface.detection import RetinaFace
-    from uniface.constants import RetinaFaceWeights
-
-    face_detector = RetinaFace(
-        model_name=RetinaFaceWeights.MNET_V2,
-        conf_thresh=0.5,
-        pre_nms_topk=5000,
-        nms_thresh=0.4,
-        post_nms_topk=750,
-        dynamic_size=False,
-        input_size=(640, 640)
-    )
-
-    model = Landmark()
-
-    cap = cv2.VideoCapture(0)
-    if not cap.isOpened():
-        print("Webcam not available.")
-        exit()
-
-    print("Press 'q' to quit.")
-
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            print("Frame capture failed.")
-            break
-
-        boxes, landmarks = face_detector.detect(frame)
-
-        if boxes is None or len(boxes) == 0:
-            cv2.imshow("Facial Landmark Detection", frame)
-            if cv2.waitKey(1) & 0xFF == ord('q'):
-                break
-            continue
-
-        for box in boxes:
-            x1, y1, x2, y2, score = box.astype(int)
-
-            lmk = model.predict(frame, box[:4])
-
-            for (x, y) in lmk.astype(int):
-                cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
-
-            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
-
-        cv2.imshow("Facial Landmark Detection", frame)
-        if cv2.waitKey(1) & 0xFF == ord('q'):
-            break
-
-    cap.release()
-    cv2.destroyAllWindows()
--- a/uniface/landmark/models.py
+++ b/uniface/landmark/models.py
@@ -0,0 +1,217 @@
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+import cv2
+import numpy as np
+import onnxruntime as ort
+from typing import Tuple
+
+from uniface.log import Logger
+from uniface.constants import LandmarkWeights
+from uniface.model_store import verify_model_weights
+from uniface.face_utils import bbox_center_alignment, transform_points_2d
+from .base import BaseLandmarker
+
+__all__ = ['Landmark']
+
+
+class Landmark106(BaseLandmarker):
+    """Facial landmark model for predicting 106 facial keypoints.
+
+    This class implements the BaseLandmarker and provides an end-to-end
+    pipeline for 106-point facial landmark detection. It handles model
+    loading, preprocessing of a face crop based on a bounding box,
+    inference, and post-processing to map landmarks back to the
+    original image coordinates.
+
+    Args:
+        model_name (LandmarkWeights): The enum specifying the landmark model to load.
+            Defaults to `LandmarkWeights.DEFAULT`.
+        input_size (Tuple[int, int]): The resolution (width, height) for the model's
+            input. Defaults to (192, 192).
+
+    Example:
+        >>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
+        >>> # bbox = [x1, y1, x2, y2]
+        >>>
+        >>> landmarker = Landmark106()
+        >>> landmarks = landmarker.get_landmarks(image, bbox)
+        >>> print(landmarks.shape)
+        (106, 2)
+    """
+    def __init__(
+        self,
+        model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
+        input_size: Tuple[int, int] = (192, 192)
+    ) -> None:
+        Logger.info(
+            f"Initializing Facial Landmark with model={model_name}, "
+            f"input_size={input_size}"
+        )
+        self.input_size = input_size
+        self.input_std = 1.0
+        self.input_mean = 0.0
+        self.model_path = verify_model_weights(model_name)
+        self._initialize_model()
+
+    def _initialize_model(self):
+        """
+        Initialize the ONNX model from the stored model path.
+
+        Raises:
+            RuntimeError: If the model fails to load or initialize.
+        """
+        try:
+            self.session = ort.InferenceSession(
+                self.model_path,
+                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
+            )
+
+            # Get input configuration
+            input_metadata = self.session.get_inputs()[0]
+            input_shape = input_metadata.shape
+            self.input_size = tuple(input_shape[2:4][::-1])  # Update input size from model
+
+            # Get input/output names
+            self.input_names = [input.name for input in self.session.get_inputs()]
+            self.output_names = [output.name for output in self.session.get_outputs()]
+
+            # Determine landmark dimensions from output shape
+            output_shape = self.session.get_outputs()[0].shape
+            self.lmk_dim = 2  # x,y coordinates
+            self.lmk_num = output_shape[1] // self.lmk_dim  # Number of landmarks
+
+            Logger.info(f"Model initialized with {self.lmk_num} landmarks")
+
+        except Exception as e:
+            Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
+            raise RuntimeError(f"Failed to initialize landmark model: {e}")
+
+    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """Prepares a face crop for inference.
+
+        This method takes a face bounding box, performs a center alignment to
+        warp the face into the model's required input size, and then creates
+        a normalized blob ready for the ONNX session.
+
+        Args:
+            image (np.ndarray): The full source image in BGR format.
+            bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
+
+        Returns:
+            Tuple[np.ndarray, np.ndarray]: A tuple containing:
+                - The preprocessed image blob ready for inference.
+                - The affine transformation matrix used for alignment.
+        """
+        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
+        center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
+        scale = self.input_size[0] / (max(width, height) * 1.5)
+        
+        aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
+
+        face_blob = cv2.dnn.blobFromImage(
+            aligned_face, 1.0 / self.input_std, self.input_size,
+            (self.input_mean, self.input_mean, self.input_mean), swapRB=True
+        )
+        return face_blob, transform_matrix
+
+    def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
+        """Converts raw model predictions back to original image coordinates.
+
+        This method reshapes the model's flat output array into landmark points,
+        denormalizes them to the model's input space, and then applies an
+        inverse affine transformation to map them back to the original image space.
+
+        Args:
+            predictions (np.ndarray): Raw landmark coordinates from the model output.
+            transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
+
+        Returns:
+            np.ndarray: An array of landmark points in the original image's coordinates.
+        """
+        landmarks = predictions.reshape((-1, 2))
+        landmarks[:, 0:2] += 1
+        landmarks[:, 0:2] *= (self.input_size[0] // 2)
+        
+        inverse_matrix = cv2.invertAffineTransform(transform_matrix)
+        landmarks = transform_points_2d(landmarks, inverse_matrix)
+        return landmarks
+
+    def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
+        """Predicts facial landmarks for the given image and face bounding box.
+
+        This is the main public method that orchestrates the full pipeline of
+        preprocessing, inference, and post-processing.
+
+        Args:
+            image (np.ndarray): The full source image in BGR format.
+            bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
+
+        Returns:
+            np.ndarray: An array of predicted landmark points with shape (106, 2).
+        """
+        face_blob, transform_matrix = self.preprocess(image, bbox)
+        raw_predictions = self.session.run(
+            self.output_names, {self.input_names[0]: face_blob}
+        )[0][0]
+        landmarks = self.postprocess(raw_predictions, transform_matrix)
+        return landmarks
+
+
+
+# TODO: For testing purposes only, remote later
+if __name__ == "__main__":
+    # UPDATED: Use the high-level factory functions
+    from uniface.detection import create_detector
+    from uniface.landmark import create_landmarker
+
+    # 1. Create the detector and landmarker using the new API
+    face_detector = create_detector('retinaface')
+    landmarker = create_landmarker() # Uses the default '106' method
+
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        print("Webcam not available.")
+        exit()
+
+    print("Press 'q' to quit.")
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            print("Frame capture failed.")
+            break
+
+        # 2. The detect method returns a list of dictionaries
+        faces = face_detector.detect(frame)
+
+        if not faces:
+            cv2.imshow("Facial Landmark Detection", frame)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+            continue
+
+        # 3. Loop through the list of face dictionaries
+        for face in faces:
+            # Extract the bounding box
+            bbox = face['bbox']
+            
+            # 4. Get landmarks for the current face using its bounding box
+            landmarks = landmarker.get_landmarks(frame, bbox)
+
+            # --- Drawing Logic ---
+            # Draw the landmarks
+            for (x, y) in landmarks.astype(int):
+                cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
+
+            # Draw the bounding box
+            x1, y1, x2, y2 = map(int, bbox)
+            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
+
+        cv2.imshow("Facial Landmark Detection", frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
--- a/uniface/recognition/init.py
+++ b/uniface/recognition/init.py
@@ -1,2 +1,63 @@
-from .base import PreprocessConfig
-from .models import SphereFace, MobileFace, ArcFace
+# Copyright 2025 Yakhyokhuja Valikhujaev
+# Author: Yakhyokhuja Valikhujaev
+# GitHub: https://github.com/yakhyo
+
+from typing import Dict
+from .models import ArcFace, MobileFace, SphereFace
+from .base import BaseRecognizer
+from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
+
+def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
+    """
+    Factory function to create face recognizers.
+
+    This function initializes and returns a face recognizer instance based on the
+    specified method. It acts as a high-level interface to the underlying
+    model classes like ArcFace, MobileFace, etc.
+
+    Args:
+        method (str): The recognition method to use.
+            Options: 'arcface' (default), 'mobileface', 'sphereface'.
+        **kwargs: Model-specific parameters passed to the recognizer's constructor.
+            For example, `model_name` can be used to select a specific
+            pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`).
+
+    Returns:
+        BaseRecognizer: An initialized recognizer instance ready for use.
+
+    Raises:
+        ValueError: If the specified `method` is not supported.
+
+    Examples:
+        >>> # Create the default ArcFace recognizer
+        >>> recognizer = create_recognizer()
+
+        >>> # Create a specific MobileFace recognizer
+        >>> from uniface.constants import MobileFaceWeights
+        >>> recognizer = create_recognizer(
+        ...     'mobileface',
+        ...     model_name=MobileFaceWeights.MNET_V2
+        ... )
+
+        >>> # Create a SphereFace recognizer
+        >>> recognizer = create_recognizer('sphereface')
+    """
+    method = method.lower()
+
+    if method == 'arcface':
+        return ArcFace(**kwargs)
+    elif method == 'mobileface':
+        return MobileFace(**kwargs)
+    elif method == 'sphereface':
+        return SphereFace(**kwargs)
+    else:
+        available = ['arcface', 'mobileface', 'sphereface']
+        raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
+
+__all__ = [
+    "create_recognizer",
+    "ArcFace",
+    "MobileFace",
+    "SphereFace",
+    "BaseRecognizer",
+]
--- a/uniface/recognition/base.py
+++ b/uniface/recognition/base.py
@@ -2,20 +2,15 @@
 # Author: Yakhyokhuja Valikhujaev
 # GitHub: https://github.com/yakhyo

+from abc import ABC, abstractmethod
 import cv2
 import numpy as np
 import onnxruntime as ort
 from dataclasses import dataclass
-
 from typing import Tuple, Union, List

 from uniface.log import Logger
-from uniface.model_store import verify_model_weights
 from uniface.face_utils import face_alignment
-from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
-
-
-__all__ = ["BaseModel", "PreprocessConfig"]


@dataclass
@@ -28,38 +23,25 @@ class PreprocessConfig:
    input_size: Tuple[int, int] = (112, 112)


-class BaseModel:
+class BaseRecognizer(ABC):
    """
-    Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace).
+    Abstract Base Class for all face recognition models.
+    It provides the core functionality for preprocessing, inference, and embedding extraction.
    """
-
-    def __init__(
-        self,
-        model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2,
-        preprocessing: PreprocessConfig = PreprocessConfig(),
-    ) -> None:
+    @abstractmethod
+    def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
        """
-        Initializes the FaceEncoder model for inference.
+        Initializes the model. Subclasses must call this.

        Args:
-            model_name: Selected model weight enum.
-            preprocessing: Configuration for input normalization and resizing.
+            model_path (str): The direct path to the verified ONNX model.
+            preprocessing (PreprocessConfig): The configuration for preprocessing.
        """
-        # Store preprocessing parameters
        self.input_mean = preprocessing.input_mean
        self.input_std = preprocessing.input_std
        self.input_size = preprocessing.input_size

-        Logger.info(
-            f"Initializing Face Recognition with model={model_name}, "
-            f"input_mean={self.input_mean}, input_std={self.input_std}, "
-            f"input_size={self.input_size}"
-        )
-
-        # Get path to model weights and initialize model
-        self.model_path = verify_model_weights(model_name)
-        Logger.info(f"Verified model weights located at: {self.model_path}")
-
+        self.model_path = model_path
        self._initialize_model()

    def _initialize_model(self) -> None:
@@ -152,14 +134,15 @@ class BaseModel:

    def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
        """
-        Extracts l2 normalized face embedding vector from an image
+        Extracts a l2 normalized face embedding vector from an image.

        Args:
            image: Input face image (BGR format).
            landmarks: Facial landmarks (5 points for alignment).

        Returns:
-            Normalied face embedding vector (typically 512-dimensional).
+            Normalized face embedding vector (typically 512-dimensional).
        """
        embedding = self.get_embedding(image, landmarks)
-        return embedding / np.linalg.norm(embedding)
+        norm = np.linalg.norm(embedding)
+        return embedding / norm if norm > 0 else embedding
--- a/uniface/recognition/models.py
+++ b/uniface/recognition/models.py
@@ -4,27 +4,35 @@

 from typing import Optional

-from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
-from .base import BaseModel, PreprocessConfig
+from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
+from uniface.model_store import verify_model_weights
+from .base import BaseRecognizer, PreprocessConfig
+
+__all__ = ["ArcFace", "MobileFace", "SphereFace"]


-__all__ = ["SphereFace", "MobileFace", "ArcFace"]
+class ArcFace(BaseRecognizer):
+    """ArcFace model for robust face recognition.

-
-class SphereFace(BaseModel):
-    """
-    SphereFace face encoder class.
-
-    This class loads a SphereFace model for face embedding extraction.
-    It supports configurable preprocessing, with a default mean/std and input size of 112x112.
+    This class provides a concrete implementation of the BaseRecognizer,
+    pre-configured for ArcFace models. It handles the loading of specific
+    ArcFace weights and sets up the appropriate default preprocessing.

    Args:
-        model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20.
-        preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization.
+        model_name (ArcFaceWeights): The specific ArcFace model variant to use.
+            Defaults to `ArcFaceWeights.MNET`.
+        preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
+            configuration. If None, a default config for ArcFace is used.
+
+    Example:
+        >>> from uniface.recognition import ArcFace
+        >>> recognizer = ArcFace()
+        >>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
    """

    def __init__(
-        self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
+        self,
+        model_name: ArcFaceWeights = ArcFaceWeights.MNET,
        preprocessing: Optional[PreprocessConfig] = None
    ) -> None:
        if preprocessing is None:
@@ -33,23 +41,32 @@ class SphereFace(BaseModel):
                input_std=127.5,
                input_size=(112, 112)
            )
-        super().__init__(model_name=model_name, preprocessing=preprocessing)
+        model_path = verify_model_weights(model_name)
+        super().__init__(model_path=model_path, preprocessing=preprocessing)


-class MobileFace(BaseModel):
-    """
-    MobileFace face encoder class.
+class MobileFace(BaseRecognizer):
+    """Lightweight MobileFaceNet model for fast face recognition.

-    Loads a lightweight MobileFaceNet model for fast face embedding extraction.
-    Default input normalization and resizing applied if preprocessing is not provided.
+    This class provides a concrete implementation of the BaseRecognizer,
+    pre-configured for MobileFaceNet models. It is optimized for speed,
+    making it suitable for edge devices.

    Args:
-        model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2.
-        preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size.
+        model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use.
+            Defaults to `MobileFaceWeights.MNET_V2`.
+        preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
+            configuration. If None, a default config for MobileFaceNet is used.
+
+    Example:
+        >>> from uniface.recognition import MobileFace
+        >>> recognizer = MobileFace()
+        >>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
    """

    def __init__(
-        self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
+        self,
+        model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
        preprocessing: Optional[PreprocessConfig] = None
    ) -> None:
        if preprocessing is None:
@@ -58,23 +75,32 @@ class MobileFace(BaseModel):
                input_std=127.5,
                input_size=(112, 112)
            )
-        super().__init__(model_name=model_name)
+        model_path = verify_model_weights(model_name)
+        super().__init__(model_path=model_path, preprocessing=preprocessing)


-class ArcFace(BaseModel):
-    """
-    ArcFace face encoder class.
+class SphereFace(BaseRecognizer):
+    """SphereFace model using angular margin for face recognition.

-    Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation.
-    Applies standard preprocessing unless overridden.
+    This class provides a concrete implementation of the BaseRecognizer,
+    pre-configured for SphereFace models, which were among the first to
+    introduce angular margin loss functions.

    Args:
-        model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET.
-        preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified.
+        model_name (SphereFaceWeights): The specific SphereFace model variant to use.
+            Defaults to `SphereFaceWeights.SPHERE20`.
+        preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
+            configuration. If None, a default config for SphereFace is used.
+
+    Example:
+        >>> from uniface.recognition import SphereFace
+        >>> recognizer = SphereFace()
+        >>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
    """

    def __init__(
-        self, model_name: ArcFaceWeights = ArcFaceWeights.MNET,
+        self,
+        model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
        preprocessing: Optional[PreprocessConfig] = None
    ) -> None:
        if preprocessing is None:
@@ -83,4 +109,6 @@ class ArcFace(BaseModel):
                input_std=127.5,
                input_size=(112, 112)
            )
-        super().__init__(model_name=model_name)
+
+        model_path = verify_model_weights(model_name)
+        super().__init__(model_path=model_path, preprocessing=preprocessing)
--- a/uniface/visualization.py
+++ b/uniface/visualization.py
@@ -4,42 +4,47 @@

 import cv2
 import numpy as np
+from typing import List, Union


-def draw_detections(image, detections, vis_threshold: float = 0.6):
+def draw_detections(
+    image: np.ndarray,
+    bboxes: Union[np.ndarray, List[List[float]]],
+    scores: Union[np.ndarray, List[float]],
+    landmarks: Union[np.ndarray, List[List[List[float]]]],
+    vis_threshold: float = 0.6
+):
    """
-    Draw bounding boxes and landmarks on the image with thickness scaled by bbox size.
+    Draws bounding boxes, scores, and landmarks from separate lists onto an image.

    Args:
-        image (ndarray): Image to draw detections on.
-        detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
-        vis_threshold (float): Confidence threshold for filtering detections.
+        image (np.ndarray): The image to draw on.
+        bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
+        scores (list or np.ndarray): A list of confidence scores.
+        landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
+        vis_threshold (float): Confidence threshold for filtering which detections to draw.
    """
-
    _colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]

-    # Unpack detections
-    boxes, landmarks = detections
-    scores = boxes[:, 4]
+    # Filter detections by score
+    keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]

-    # Filter detections by confidence threshold
-    filtered = scores >= vis_threshold
-    boxes = boxes[filtered, :4].astype(np.int32)
-    landmarks = landmarks[filtered]
-    scores = scores[filtered]
+    # Draw the filtered detections
+    for i in keep_indices:
+        bbox = np.array(bboxes[i], dtype=np.int32)
+        score = scores[i]
+        landmark_set = np.array(landmarks[i], dtype=np.int32)

-    # Draw bounding boxes, scores, and landmarks
-    for box, score, landmark in zip(boxes, scores, landmarks):
-        # Calculate thickness proportional to the bbox size
-        thickness = max(1, int(min(box[2] - box[0], box[3] - box[1]) / 100))
+        # Calculate adaptive thickness
+        thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100))

-        # Draw rectangle
-        cv2.rectangle(image, tuple(box[:2]), tuple(box[2:]), (0, 0, 255), thickness)
+        # Draw bounding box
+        cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)

        # Draw score
-        cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12),
+        cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)

        # Draw landmarks
-        for point, color in zip(landmark, _colors):
-            cv2.circle(image, tuple(point), thickness, color, -1)
+        for j, point in enumerate(landmark_set):
+            cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)