ref: Update some modules and remove redundant parts

2025-12-30 09:02:25 +00:00 · 2025-05-08 17:11:13 +09:00
parent b35b1a3f7c
commit fb29a919b1
6 changed files with 323 additions and 228 deletions
--- a/scripts/search_face.py
+++ b/scripts/search_face.py
@@ -18,7 +18,6 @@ def extract_reference_embedding(detector, recognizer, image_path):
        raise RuntimeError("No faces found in reference image.")

    embedding = recognizer.get_embedding(image, landmarks[0])
-    print(f"Reference embedding extracted (L2 norm = {np.linalg.norm(embedding):.4f})")
    return embedding


--- a/uniface/attribute/age_gender.py
+++ b/uniface/attribute/age_gender.py
@@ -4,12 +4,10 @@ import onnxruntime as ort
 from typing import Tuple

 from uniface.log import Logger
+from uniface.constants import AgeGenderWeights
 from uniface.face_utils import bbox_center_alignment
 from uniface.model_store import verify_model_weights
-from uniface.constants import AgeGenderWeights

-from uniface.detection import RetinaFace
-from uniface.constants import RetinaFaceWeights

 __all__ = ["AgeGender"]

@@ -17,109 +15,156 @@ __all__ = ["AgeGender"]
 class AgeGender:
    """
    Age and Gender Prediction Model.
+
+    This model predicts both a person's gender (male/female) and age from a facial image.
+    Gender is returned as an integer (0: female, 1: male) and age as years.
    """

-    def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT, input_size: Tuple[int, int] = (112, 112)) -> None:
+    def __init__(
+        self,
+        model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
+        input_size: Tuple[int, int] = (112, 112)
+    ) -> None:
        """
-        Initializes the Attribute model for inference.
+        Initializes the Age and Gender prediction model.

        Args:
-            model_path (str): Path to the ONNX file.
+            model_name: Model weights enum to use
+            input_size: Input resolution for the model (width, height)
        """
-
        Logger.info(
            f"Initializing AgeGender with model={model_name}, "
            f"input_size={input_size}"
        )

+        # Model configuration
        self.input_size = input_size
        self.input_std = 1.0
        self.input_mean = 0.0

        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        self.model_path = verify_model_weights(model_name)
+        Logger.info(f"Verified model weights located at: {self.model_path}")

        # Initialize model
-        self._initialize_model(model_path=self._model_path)
+        self._initialize_model()

-    def _initialize_model(self, model_path: str):
-        """Initialize the model from the given path.
+    def _initialize_model(self):
+        """
+        Initialize the ONNX model for inference.

-        Args:
-            model_path (str): Path to .onnx model.
+        Raises:
+            RuntimeError: If the model fails to load or initialize.
        """
        try:
+            # Initialize session with available providers
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
-
            )

-            # Get model info
-            metadata = self.session.get_inputs()[0]
-            input_shape = metadata.shape
-            self.input_size = tuple(input_shape[2:4][::-1])
+            # Extract model metadata
+            input_metadata = self.session.get_inputs()[0]
+            input_shape = input_metadata.shape
+            self.input_size = tuple(input_shape[2:4][::-1])  # Update from model (width, height)

-            self.input_names = [x.name for x in self.session.get_inputs()]
-            self.output_names = [x.name for x in self.session.get_outputs()]
+            # Get input/output names
+            self.input_names = [input.name for input in self.session.get_inputs()]
+            self.output_names = [output.name for output in self.session.get_outputs()]
+
+            Logger.info(f"Successfully initialized AgeGender model")

        except Exception as e:
-            print(f"Failed to load the model: {e}")
-            raise
+            Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
+            raise RuntimeError(f"Failed to initialize AgeGender model: {e}")

-    def preprocess(self, image: np.ndarray, bbox: np.ndarray):
-        """Preprocessing
+    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
+        """
+        Preprocess the input image and face bounding box for inference.

        Args:
-            image (np.ndarray): Numpy image
-            bbox (np.ndarray): Bounding box coordinates: [x1, y1, x2, y2]
+            image: Input image in BGR format
+            bbox: Face bounding box coordinates [x1, y1, x2, y2]

        Returns:
-            np.ndarray: Transformed image
+            Preprocessed image blob ready for inference
        """
+        # Calculate face dimensions and center
        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
+
+        # Determine scale to fit face with margin
        scale = self.input_size[0] / (max(width, height) * 1.5)
        rotation = 0.0

-        transformed_image, M = bbox_center_alignment(image, center, self.input_size[0], scale, rotation)
-
-        input_size = tuple(transformed_image.shape[0:2][::-1])
-
-        blob = cv2.dnn.blobFromImage(
-            transformed_image,
-            1.0/self.input_std,
-            input_size,
-            (self.input_mean, self.input_mean, self.input_mean),
-            swapRB=True
+        # Align face based on bounding box
+        aligned_face, _ = bbox_center_alignment(
+            image, center, self.input_size[0], scale, rotation
        )
-        return blob

-    def postprocess(self, predictions: np.ndarray) -> Tuple[np.int64, int]:
-        """Postprocessing
+        # Convert to blob format for network input
+        face_blob = cv2.dnn.blobFromImage(
+            aligned_face,
+            1.0 / self.input_std,
+            self.input_size,
+            (self.input_mean, self.input_mean, self.input_mean),
+            swapRB=True  # Convert BGR to RGB
+        )
+
+        return face_blob
+
+    def postprocess(self, predictions: np.ndarray) -> Tuple[int, int]:
+        """
+        Process model predictions to extract gender and age.

        Args:
-            predictions (np.ndarray): Model predictions, shape: [1, 3]
+            predictions: Raw model output, shape [1, 3] where:
+                         - First two elements represent gender logits
+                         - Third element represents normalized age

        Returns:
-            Tuple[np.int64, int]: Gender and Age values
+            Tuple containing:
+                - Gender (0: female, 1: male)
+                - Age in years
        """
-        gender = np.argmax(predictions[:2])
+        # First two values are gender logits (female/male)
+        gender = int(np.argmax(predictions[:2]))
+
+        # Third value is normalized age that needs scaling
        age = int(np.round(predictions[2] * 100))
-        return gender, age
-
-    def predict(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.int64, int]:
-        blob = self.preprocess(image, bbox)
-        predictions = self.session.run(self.output_names, {self.input_names[0]: blob})[0][0]
-        gender, age = self.postprocess(predictions)

        return gender, age

+    def predict(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[int, int]:
+        """
+        Predict age and gender for a face in the image.
+
+        Args:
+            image: Input image in BGR format
+            bbox: Face bounding box [x1, y1, x2, y2]
+
+        Returns:
+            - 'gender_id': Gender as integer (0: female, 1: male)
+            - 'age': Age in years
+        """
+        # Preprocess and run inference
+        face_blob = self.preprocess(image, bbox)
+        predictions = self.session.run(
+            self.output_names,
+            {self.input_names[0]: face_blob}
+        )[0][0]
+
+        # Extract gender and age from predictions
+        gender_id, age = self.postprocess(predictions)
+
+        return gender_id, age
+

 # TODO: For testing purposes only, remove later

 def main():
+    from uniface.detection import RetinaFace
+    from uniface.constants import RetinaFaceWeights

    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
--- a/uniface/attribute/emotion.py
+++ b/uniface/attribute/emotion.py
@@ -10,10 +10,9 @@ from PIL import Image
 from typing import Tuple, Union

 from uniface.log import Logger
-from uniface import RetinaFace
+from uniface.constants import DDAMFNWeights
 from uniface.face_utils import face_alignment
 from uniface.model_store import verify_model_weights
-from uniface.constants import RetinaFaceWeights, DDAMFNWeights


 class Emotion:
@@ -21,10 +20,11 @@ class Emotion:
    Emotion recognition using a TorchScript model.

    Args:
-        model_name (DDAMFNWeights): Pretrained model enum. Defaults to AFFECNET7.
+        model_weights (DDAMFNWeights): Pretrained model weights enum. Defaults to AFFECNET7.
+        input_size (Tuple[int, int]): Size of input images. Defaults to (112, 112).

    Attributes:
-        emotions (List[str]): Emotion label list.
+        emotion_labels (List[str]): List of emotion labels the model can predict.
        device (torch.device): Inference device (CPU or CUDA).
        model (torch.jit.ScriptModule): Loaded TorchScript model.

@@ -33,122 +33,133 @@ class Emotion:
        RuntimeError: If model loading fails.
    """

-    def __init__(self, model_name: DDAMFNWeights = DDAMFNWeights.AFFECNET7, input_size: Tuple[int, int] = (112, 112)) -> None:
+    def __init__(
+            self,
+            model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
+            input_size: Tuple[int, int] = (112, 112)
+    ) -> None:
        """
        Initialize the emotion detector with a TorchScript model
        """
-
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-        self.emotions = [
+        self.emotion_labels = [
            "Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"
        ]
-        if model_name == DDAMFNWeights.AFFECNET8:
-            self.emotions.append("Contempt")

+        # Add contempt for AFFECNET8 model
+        if model_weights == DDAMFNWeights.AFFECNET8:
+            self.emotion_labels.append("Contempt")
+
+        # Initialize image preprocessing parameters
        self.input_size = input_size
-        self.input_std = [0.229, 0.224, 0.225]
-        self.input_mean = [0.485, 0.456, 0.406]
+        self.normalization_std = [0.229, 0.224, 0.225]
+        self.normalization_mean = [0.485, 0.456, 0.406]

        Logger.info(
-            f"Initialized Emotion class with model={model_name.name}, "
+            f"Initialized Emotion class with model={model_weights.name}, "
            f"device={'cuda' if torch.cuda.is_available() else 'cpu'}, "
-            f"num_classes={len(self.emotions)}, input_size={self.input_size}"
+            f"num_classes={len(self.emotion_labels)}, input_size={self.input_size}"
        )

-        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
-        Logger.info(f"Verified model weights located at: {self._model_path}")
+        # Get path to model weights and initialize model
+        self.model_path = verify_model_weights(model_weights)
+        Logger.info(f"Verified model weights located at: {self.model_path}")
+        self._load_model()

-        # Initialize model
-        self._initialize_model(model_path=self._model_path)
-
-    def _initialize_model(self, model_path: str) -> None:
+    def _load_model(self) -> None:
        """
-        Initializes a TorchScript model for emotion inference.
+        Loads and initializes a TorchScript model for emotion inference.

-        Args:
-            model_path (str): Path to the TorchScript (.pt) model.
+        Raises:
+            RuntimeError: If loading the model fails.
        """
        try:
-            self.model = torch.jit.load(model_path, map_location=self.device)
+            self.model = torch.jit.load(self.model_path, map_location=self.device)
            self.model.eval()
-            Logger.info(f"TorchScript model successfully loaded from: {model_path}")
+            Logger.info(f"TorchScript model successfully loaded from: {self.model_path}")

-            # Warm-up
-            dummy = torch.randn(1, 3, 112, 112).to(self.device)
+            # Warm-up with dummy input
+            dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
            with torch.no_grad():
-                _ = self.model(dummy)
+                _ = self.model(dummy_input)
            Logger.info("Emotion model warmed up with dummy input.")

        except Exception as e:
-            Logger.error(f"Failed to load TorchScript model from {model_path}: {e}")
-            raise
+            Logger.error(f"Failed to load TorchScript model from {self.model_path}: {e}")
+            raise RuntimeError(f"Model loading failed: {str(e)}")

    def preprocess(self, image: np.ndarray) -> torch.Tensor:
        """
-        Resize, normalize and convert image to tensor manually without torchvision.
+        Preprocess image for model inference: resize, normalize and convert to tensor.

        Args:
            image (np.ndarray): BGR image (H, W, 3)
-        Returns:
-            torch.Tensor: Preprocessed image tensor of shape (1, 3, 112, 112)
-        """
-        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR -> RGB

-        # Resize to (112, 112)
-        image = cv2.resize(image, self.input_size).astype(np.float32) / 255.0
+        Returns:
+            torch.Tensor: Preprocessed image tensor of shape (1, 3, H, W)
+        """
+        # Convert BGR to RGB
+        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+        # Resize to target input size
+        resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0

        # Normalize with mean and std
-        mean = np.array(self.input_mean, dtype=np.float32)
-        std = np.array(self.input_std, dtype=np.float32)
-        image_normalized = (image - mean) / std
+        mean_array = np.array(self.normalization_mean, dtype=np.float32)
+        std_array = np.array(self.normalization_std, dtype=np.float32)
+        normalized_image = (resized_image - mean_array) / std_array

-        # HWC to CHW
-        image_transposed = image_normalized.transpose((2, 0, 1))
+        # Convert from HWC to CHW format
+        transposed_image = normalized_image.transpose((2, 0, 1))

        # Convert to torch tensor and add batch dimension
-        tensor = torch.from_numpy(image_transposed).unsqueeze(0).to(self.device)
-
+        tensor = torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
        return tensor

    def predict(self, image: np.ndarray, landmark: np.ndarray) -> Tuple[Union[str, None], Union[float, None]]:
        """
-        Predict the emotion from an BGR face image.
+        Predict the emotion from a face image.

        Args:
-            image (np.ndarray): Input face image in RGB format.
+            image (np.ndarray): Input face image in BGR format.
            landmark (np.ndarray): Facial five point landmark.

        Returns:
            Tuple[str, float]: (Predicted emotion label, Confidence score)
+            Returns (None, None) if prediction fails.

        Raises:
-            RuntimeError: If the input is invalid or inference fails internally.
+            ValueError: If the input is not a valid BGR image.
        """
+        # Validate input
        if not isinstance(image, np.ndarray):
            Logger.error("Input must be a NumPy ndarray.")
-            raise ValueError("Input must be a NumPy ndarray (RGB image).")
+            raise ValueError("Input must be a NumPy ndarray (BGR image).")

        if image.ndim != 3 or image.shape[2] != 3:
-            Logger.error(f"Invalid image shape: {image.shape}. Expected HxWx3 RGB image.")
-            raise ValueError("Input image must be in RGB format with shape (H, W, 3).")
+            Logger.error(f"Invalid image shape: {image.shape}. Expected HxWx3 image.")
+            raise ValueError("Input image must have shape (H, W, 3).")

        try:
-            image, _ = face_alignment(image, landmark)
-            tensor = self.preprocess(image)
+            # Align face using landmarks
+            aligned_image, _ = face_alignment(image, landmark)
+
+            # Preprocess and run inference
+            input_tensor = self.preprocess(aligned_image)

            with torch.no_grad():
-                output = self.model(tensor)
+                output = self.model(input_tensor)

+                # Handle case where model returns a tuple
                if isinstance(output, tuple):
                    output = output[0]

-                probs = torch.nn.functional.softmax(output, dim=1).squeeze(0).cpu().numpy()
-                pred_idx = int(np.argmax(probs))
-                confidence = round(float(probs[pred_idx]), 2)
+                # Get probabilities and prediction
+                probabilities = torch.nn.functional.softmax(output, dim=1).squeeze(0).cpu().numpy()
+                predicted_index = int(np.argmax(probabilities))
+                confidence_score = round(float(probabilities[predicted_index]), 2)

-                return self.emotions[pred_idx], confidence
+                return self.emotion_labels[predicted_index], confidence_score

        except Exception as e:
            Logger.error(f"Emotion inference failed: {e}")
@@ -158,6 +169,8 @@ class Emotion:
 # TODO: For testing purposes only, remove later

 def main():
+    from uniface import RetinaFace
+    from uniface.constants import RetinaFaceWeights

    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -11,8 +11,9 @@ import onnxruntime as ort
 from typing import Tuple, List, Literal

 from uniface.log import Logger
-from uniface.model_store import verify_model_weights
 from uniface.constants import SCRFDWeights
+from uniface.model_store import verify_model_weights
+
 from .utils import non_max_supression, distance2bbox, distance2kps, resize_image

 __all__ = ['SCRFD']
@@ -249,12 +250,11 @@ class SCRFD:
            det = det[sorted_indices]
            landmarks = landmarks[sorted_indices]

-            
-
        return det, landmarks

 # TODO: below is only for testing, remove it later

+
 def draw_bbox(frame, bbox, color=(0, 255, 0), thickness=2):
    x1, y1, x2, y2 = bbox[:4].astype(np.int32)
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
@@ -267,6 +267,8 @@ def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
        cv2.circle(frame, (x, y), radius, color, -1)


+# TODO: Remove late, just for testing
+
 if __name__ == "__main__":
    detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
    cap = cv2.VideoCapture(0)
--- a/uniface/landmark/model.py
+++ b/uniface/landmark/model.py
@@ -6,131 +6,163 @@ import numpy as np
 from typing import Tuple

 from uniface.log import Logger
-from uniface.face_utils import bbox_center_alignment, transform_points_2d
+from uniface.constants import LandmarkWeights
 from uniface.model_store import verify_model_weights
-
-from uniface.detection import RetinaFace
-from uniface.constants import RetinaFaceWeights, LandmarkWeights
+from uniface.face_utils import bbox_center_alignment, transform_points_2d

 __all__ = ['Landmark']


 class Landmark:
-    def __init__(self, model_name: LandmarkWeights = LandmarkWeights.DEFAULT, input_size: Tuple[int, int] = (192, 192)) -> None:
+    """
+    Facial landmark detection model for predicting facial keypoints.
+    """
+    
+    def __init__(
+        self, 
+        model_name: LandmarkWeights = LandmarkWeights.DEFAULT, 
+        input_size: Tuple[int, int] = (192, 192)
+    ) -> None:
        """
        Initializes the Facial Landmark model for inference.

        Args:
-            model_path (str): Path to the ONNX file.
+            model_name: Enum specifying which landmark model weights to use
+            input_size: Input resolution for the model (width, height)
        """
-
        Logger.info(
            f"Initializing Facial Landmark with model={model_name}, "
            f"input_size={input_size}"
        )

+        # Initialize configuration
        self.input_size = input_size
        self.input_std = 1.0
        self.input_mean = 0.0

        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        self.model_path = verify_model_weights(model_name)
+        Logger.info(f"Verified model weights located at: {self.model_path}")

        # Initialize model
-        self._initialize_model(model_path=self._model_path)
+        self._initialize_model()

-    def _initialize_model(self, model_path: str):
-        """ Initialize the model from the given path.
-        Args:
-            model_path (str): Path to .onnx model.
+    def _initialize_model(self):
+        """
+        Initialize the ONNX model from the stored model path.
+        
+        Raises:
+            RuntimeError: If the model fails to load or initialize.
        """
        try:
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            )

-            metadata = self.session.get_inputs()[0]
-            input_shape = metadata.shape
-            self.input_size = tuple(input_shape[2:4][::-1])
+            # Get input configuration
+            input_metadata = self.session.get_inputs()[0]
+            input_shape = input_metadata.shape
+            self.input_size = tuple(input_shape[2:4][::-1])  # Update input size from model

-            self.input_names = [x.name for x in self.session.get_inputs()]
-            self.output_names = [x.name for x in self.session.get_outputs()]
+            # Get input/output names
+            self.input_names = [input.name for input in self.session.get_inputs()]
+            self.output_names = [output.name for output in self.session.get_outputs()]

-            outputs = self.session.get_outputs()
-            output_shape = outputs[0].shape
-            self.lmk_dim = 2
-            self.lmk_num = output_shape[1] // self.lmk_dim
+            # Determine landmark dimensions from output shape
+            output_shape = self.session.get_outputs()[0].shape
+            self.lmk_dim = 2  # x,y coordinates
+            self.lmk_num = output_shape[1] // self.lmk_dim  # Number of landmarks
+            
+            Logger.info(f"Model initialized with {self.lmk_num} landmarks")

        except Exception as e:
-            print(f"Failed to load the model: {e}")
-            raise
+            Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
+            raise RuntimeError(f"Failed to initialize landmark model: {e}")

    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
-        Preprocess the input image and bbox for inference.
+        Preprocess the input image and bounding box for inference.

        Args:
-            image (np.ndarray): Input image.
-            bbox (np.ndarray): Bounding box [x1, y1, x2, y2].
+            image: Input image in BGR format
+            bbox: Bounding box coordinates [x1, y1, x2, y2]

        Returns:
-            Tuple[np.ndarray, np.ndarray]: Preprocessed blob and transformation matrix.
+            Tuple containing:
+                - Preprocessed image blob ready for inference
+                - Transformation matrix for mapping predictions back to original image
        """
+        # Calculate face dimensions and center
        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
+        
+        # Determine scale to fit face with some margin
        scale = self.input_size[0] / (max(width, height) * 1.5)
        rotation = 0.0

-        transformed_image, M = bbox_center_alignment(image, center, self.input_size[0], scale, rotation)
-        input_size = tuple(transformed_image.shape[0:2][::-1])
-
-        blob = cv2.dnn.blobFromImage(
-            transformed_image,
-            1.0/self.input_std,
-            input_size,
-            (self.input_mean, self.input_mean, self.input_mean),
-            swapRB=True
+        # Align face using center, scale and rotation
+        aligned_face, transform_matrix = bbox_center_alignment(
+            image, center, self.input_size[0], scale, rotation
        )
-        return blob, M
        
-    def postprocess(self, predictions: np.ndarray, M: np.ndarray) -> np.ndarray:
+        # Convert to blob format for inference
+        face_blob = cv2.dnn.blobFromImage(
+            aligned_face,
+            1.0 / self.input_std,
+            self.input_size,
+            (self.input_mean, self.input_mean, self.input_mean),
+            swapRB=True  # Convert BGR to RGB
+        )
+        
+        return face_blob, transform_matrix
+
+    def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
        """
-        Postprocess model outputs to get landmarks.
+        Convert raw model predictions to image coordinates.

        Args:
-            predictions (np.ndarray): Raw model predictions.
-            M (np.ndarray): Affine transformation matrix.
+            predictions: Raw landmark coordinates from model output
+            transform_matrix: Affine transformation matrix from preprocessing

        Returns:
-            np.ndarray: Transformed landmarks.
+            Landmarks in original image coordinates
        """
+        # Reshape to pairs of x,y coordinates
+        landmarks = predictions.reshape((-1, 2))

-        predictions = predictions.reshape((-1, 2))
+        # Denormalize coordinates to pixel space
+        landmarks[:, 0:2] += 1  # Shift from [-1,1] to [0,2] range
+        landmarks[:, 0:2] *= (self.input_size[0] // 2)  # Scale to pixel coordinates

-        predictions[:, 0:2] += 1
-        predictions[:, 0:2] *= (self.input_size[0] // 2)
+        # Invert the transformation to map back to original image
+        inverse_matrix = cv2.invertAffineTransform(transform_matrix)
+        landmarks = transform_points_2d(landmarks, inverse_matrix)

-        IM = cv2.invertAffineTransform(M)
-        predictions = transform_points_2d(predictions, IM)
-
-        return predictions
+        return landmarks

    def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
        """
-        Predict facial landmarks for the given image and bounding box.
+        Predict facial landmarks for the given image and face bounding box.

        Args:
-            image (np.ndarray): Input image.
-            bbox (np.ndarray): Bounding box [x1, y1, x2, y2].
+            image: Input image in BGR format
+            bbox: Face bounding box [x1, y1, x2, y2]

        Returns:
-            np.ndarray: Predicted landmarks.
+            Array of facial landmarks in original image coordinates
        """
-        blob, M = self.preprocess(image, bbox)
-        preds = self.session.run(self.output_names, {self.input_names[0]: blob})[0][0]
-        landmarks = self.postprocess(preds, M)
+        # Preprocess image
+        face_blob, transform_matrix = self.preprocess(image, bbox)
+        
+        # Run inference
+        raw_predictions = self.session.run(
+            self.output_names, 
+            {self.input_names[0]: face_blob}
+        )[0][0]
+        
+        # Postprocess to get landmarks in original image space
+        landmarks = self.postprocess(raw_predictions, transform_matrix)

        return landmarks

@@ -138,6 +170,8 @@ class Landmark:


 if __name__ == "__main__":
+    from uniface.detection import RetinaFace
+    from uniface.constants import RetinaFaceWeights
    
    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
--- a/uniface/recognition/base.py
+++ b/uniface/recognition/base.py
@@ -7,7 +7,6 @@ import os
 import cv2
 import numpy as np
 import onnxruntime as ort
-
 from typing import Tuple, Optional, Union, List
 from dataclasses import dataclass

@@ -37,100 +36,99 @@ class BaseFaceEncoder:

    def __init__(
        self,
-        model_name: SphereFaceWeights | MobileFaceWeights | ArcFaceWeights = MobileFaceWeights.MNET_V2,
+        model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2,
        preprocessing: PreprocessConfig = PreprocessConfig(),
    ) -> None:
        """
        Initializes the FaceEncoder model for inference.

        Args:
-            model_name (SphereFaceWeights | MobileFaceWeights | ArcFaceWeights): Selected model weight enum.
-            preprocessing (PreprocessConfig): Configuration for input normalization and resizing.
+            model_name: Selected model weight enum.
+            preprocessing: Configuration for input normalization and resizing.
        """
+        # Store preprocessing parameters
        self.input_mean = preprocessing.input_mean
        self.input_std = preprocessing.input_std
        self.input_size = preprocessing.input_size

        Logger.info(
            f"Initializing Face Recognition with model={model_name}, "
-            f"input_mean={self.input_mean}, input_std={self.input_std}, input_size={self.input_size}"
+            f"input_mean={self.input_mean}, input_std={self.input_std}, "
+            f"input_size={self.input_size}"
        )

-        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        # Get path to model weights and initialize model
+        self.model_path = verify_model_weights(model_name)
+        Logger.info(f"Verified model weights located at: {self.model_path}")

-        # Initialize model
-        self._initialize_model(self._model_path)
+        self._initialize_model()

-    def _initialize_model(self, model_path: str) -> None:
+    def _initialize_model(self) -> None:
        """
        Loads the ONNX model and prepares it for inference.

-        Args:
-            model_path (str): Path to the ONNX model file.
-
        Raises:
            RuntimeError: If the model fails to load or initialize.
        """
        try:
+            # Initialize model session with available providers
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            )
-            self._setup_model()
-            Logger.info(f"Successfully initialized face encoder from {model_path}")
-        except Exception as e:
-            Logger.error(f"Failed to load face encoder model from '{model_path}'", exc_info=True)
-            raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e

-    def _setup_model(self) -> None:
-        """
-        Extracts input/output configuration from the ONNX model session.
-        """
+            # Extract input configuration
            input_cfg = self.session.get_inputs()[0]
+            self.input_name = input_cfg.name
+
+            # Verify input dimensions match our configuration
            input_shape = input_cfg.shape
            model_input_size = tuple(input_shape[2:4][::-1])  # (width, height)
-
            if model_input_size != self.input_size:
                Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")

-        self.input_name = input_cfg.name
+            # Extract output configuration
            self.output_names = [output.name for output in self.session.get_outputs()]
            self.output_shape = self.session.get_outputs()[0].shape

            assert len(self.output_names) == 1, "Expected only one output node."
+            Logger.info(f"Successfully initialized face encoder from {self.model_path}")

-    def preprocess(self, image: np.ndarray) -> np.ndarray:
+        except Exception as e:
+            Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
+            raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
+
+    def preprocess(self, face_img: np.ndarray) -> np.ndarray:
        """
        Preprocess the image: resize, normalize, and convert it to a blob.

        Args:
-            image (np.ndarray): Input image in BGR format.
+            face_img: Input image in BGR format.

        Returns:
-            np.ndarray: Preprocessed image as a NumPy array ready for inference.
+            Preprocessed image as a NumPy array ready for inference.
        """
-        image = cv2.resize(image, self.input_size)  # Resize to (112, 112)
-        if isinstance(self.input_std, (list, tuple)):
-            # if self.input_std is a list, we assume it's per-channel std
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
+        resized_img = cv2.resize(face_img, self.input_size)

-            image -= np.array(self.input_mean, dtype=np.float32)
-            image /= np.array(self.input_std, dtype=np.float32)
+        if isinstance(self.input_std, (list, tuple)):
+            # Per-channel normalization
+            rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
+            normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
+                np.array(self.input_std, dtype=np.float32)

            # Change to NCHW (batch, channels, height, width)
-            blob = np.transpose(image, (2, 0, 1))  # CHW
+            blob = np.transpose(normalized_img, (2, 0, 1))  # CHW
            blob = np.expand_dims(blob, axis=0)  # NCHW
        else:
-            # cv2.dnn.blobFromImage does not support per-channel std so we use a single value here
+            # Single-value normalization
            blob = cv2.dnn.blobFromImage(
-                image,
+                resized_img,
                scalefactor=1.0 / self.input_std,
                size=self.input_size,
                mean=(self.input_mean, self.input_mean, self.input_mean),
                swapRB=True  # Convert BGR to RGB
            )
+
        return blob

    def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
@@ -138,13 +136,17 @@ class BaseFaceEncoder:
        Extracts face embedding from an aligned image.

        Args:
-            image (np.ndarray): Input face image (BGR format).
-            landmarks (np.ndarray): Facial landmarks (5 points for alignment).
+            image: Input face image (BGR format).
+            landmarks: Facial landmarks (5 points for alignment).

        Returns:
-            np.ndarray: 512-dimensional face embedding.
+            Face embedding vector (typically 512-dimensional).
        """
-        aligned_face, _ = face_alignment(image, landmarks)  # Use your function for alignment
-        blob = self.preprocess(aligned_face)  # Convert to blob
-        embedding = self.session.run(self.output_names, {self.input_name: blob})[0]
-        return embedding  # Return the 512-D feature vector
+        # Align face using landmarks
+        aligned_face, _ = face_alignment(image, landmarks)
+
+        # Generate embedding from aligned face
+        face_blob = self.preprocess(aligned_face)
+        embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
+
+        return embedding