ref: Update some modules and remove redundant parts

2025-12-30 09:02:25 +00:00 · 2025-05-08 17:11:13 +09:00
parent b35b1a3f7c
commit fb29a919b1
6 changed files with 323 additions and 228 deletions
--- a/scripts/search_face.py
+++ b/scripts/search_face.py
@@ -18,7 +18,6 @@ def extract_reference_embedding(detector, recognizer, image_path):
        raise RuntimeError("No faces found in reference image.")
    embedding = recognizer.get_embedding(image, landmarks[0])
    print(f"Reference embedding extracted (L2 norm = {np.linalg.norm(embedding):.4f})")
    return embedding
--- a/uniface/attribute/age_gender.py
+++ b/uniface/attribute/age_gender.py
@@ -4,12 +4,10 @@ import onnxruntime as ort
 from typing import Tuple
 from uniface.log import Logger
 from uniface.constants import AgeGenderWeights
 from uniface.face_utils import bbox_center_alignment
 from uniface.model_store import verify_model_weights
 from uniface.constants import AgeGenderWeights
 from uniface.detection import RetinaFace
 from uniface.constants import RetinaFaceWeights
 __all__ = ["AgeGender"]
@@ -17,109 +15,156 @@ __all__ = ["AgeGender"]
 class AgeGender:
    """
    Age and Gender Prediction Model.
    This model predicts both a person's gender (male/female) and age from a facial image.
    Gender is returned as an integer (0: female, 1: male) and age as years.
    """
-    def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT, input_size: Tuple[int, int] = (112, 112)) -> None:
+    def __init__(
        self,
        model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
        input_size: Tuple[int, int] = (112, 112)
    ) -> None:
        """
-        Initializes the Attribute model for inference.
+        Initializes the Age and Gender prediction model.
        Args:
-            model_path (str): Path to the ONNX file.
+            model_name: Model weights enum to use
            input_size: Input resolution for the model (width, height)
        """
        Logger.info(
            f"Initializing AgeGender with model={model_name}, "
            f"input_size={input_size}"
        )
        # Model configuration
        self.input_size = input_size
        self.input_std = 1.0
        self.input_mean = 0.0
        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
+        self.model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        Logger.info(f"Verified model weights located at: {self.model_path}")
        # Initialize model
-        self._initialize_model(model_path=self._model_path)
+        self._initialize_model()
-    def _initialize_model(self, model_path: str):
+    def _initialize_model(self):
-        """Initialize the model from the given path.
+        """
        Initialize the ONNX model for inference.
-        Args:
+        Raises:
-            model_path (str): Path to .onnx model.
+            RuntimeError: If the model fails to load or initialize.
        """
        try:
            # Initialize session with available providers
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            )
-            # Get model info
+            # Extract model metadata
-            metadata = self.session.get_inputs()[0]
+            input_metadata = self.session.get_inputs()[0]
-            input_shape = metadata.shape
+            input_shape = input_metadata.shape
-            self.input_size = tuple(input_shape[2:4][::-1])
+            self.input_size = tuple(input_shape[2:4][::-1])  # Update from model (width, height)
-            self.input_names = [x.name for x in self.session.get_inputs()]
+            # Get input/output names
-            self.output_names = [x.name for x in self.session.get_outputs()]
+            self.input_names = [input.name for input in self.session.get_inputs()]
            self.output_names = [output.name for output in self.session.get_outputs()]
            Logger.info(f"Successfully initialized AgeGender model")
        except Exception as e:
-            print(f"Failed to load the model: {e}")
+            Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
-            raise
+            raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
-    def preprocess(self, image: np.ndarray, bbox: np.ndarray):
+    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
-        """Preprocessing
+        """
        Preprocess the input image and face bounding box for inference.
        Args:
-            image (np.ndarray): Numpy image
+            image: Input image in BGR format
-            bbox (np.ndarray): Bounding box coordinates: [x1, y1, x2, y2]
+            bbox: Face bounding box coordinates [x1, y1, x2, y2]
        Returns:
-            np.ndarray: Transformed image
+            Preprocessed image blob ready for inference
        """
        # Calculate face dimensions and center
        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
        # Determine scale to fit face with margin
        scale = self.input_size[0] / (max(width, height) * 1.5)
        rotation = 0.0
-        transformed_image, M = bbox_center_alignment(image, center, self.input_size[0], scale, rotation)
+        # Align face based on bounding box
-
+        aligned_face, _ = bbox_center_alignment(
-        input_size = tuple(transformed_image.shape[0:2][::-1])
+            image, center, self.input_size[0], scale, rotation
        blob = cv2.dnn.blobFromImage(
            transformed_image,
            1.0/self.input_std,
            input_size,
            (self.input_mean, self.input_mean, self.input_mean),
            swapRB=True
        )
        return blob
-    def postprocess(self, predictions: np.ndarray) -> Tuple[np.int64, int]:
+        # Convert to blob format for network input
-        """Postprocessing
+        face_blob = cv2.dnn.blobFromImage(
            aligned_face,
            1.0 / self.input_std,
            self.input_size,
            (self.input_mean, self.input_mean, self.input_mean),
            swapRB=True  # Convert BGR to RGB
        )
        return face_blob
    def postprocess(self, predictions: np.ndarray) -> Tuple[int, int]:
        """
        Process model predictions to extract gender and age.
        Args:
-            predictions (np.ndarray): Model predictions, shape: [1, 3]
+            predictions: Raw model output, shape [1, 3] where:
                         - First two elements represent gender logits
                         - Third element represents normalized age
        Returns:
-            Tuple[np.int64, int]: Gender and Age values
+            Tuple containing:
                - Gender (0: female, 1: male)
                - Age in years
        """
-        gender = np.argmax(predictions[:2])
+        # First two values are gender logits (female/male)
-        age = int(np.round(predictions[2]*100))
+        gender = int(np.argmax(predictions[:2]))
        return gender, age
-    def predict(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.int64, int]:
+        # Third value is normalized age that needs scaling
-        blob = self.preprocess(image, bbox)
+        age = int(np.round(predictions[2] * 100))
        predictions = self.session.run(self.output_names, {self.input_names[0]: blob})[0][0]
        gender, age = self.postprocess(predictions)
        return gender, age
    def predict(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[int, int]:
        """
        Predict age and gender for a face in the image.
        Args:
            image: Input image in BGR format
            bbox: Face bounding box [x1, y1, x2, y2]
        Returns:
            - 'gender_id': Gender as integer (0: female, 1: male)
            - 'age': Age in years
        """
        # Preprocess and run inference
        face_blob = self.preprocess(image, bbox)
        predictions = self.session.run(
            self.output_names,
            {self.input_names[0]: face_blob}
        )[0][0]
        # Extract gender and age from predictions
        gender_id, age = self.postprocess(predictions)
        return gender_id, age
 # TODO: For testing purposes only, remove later
 def main():
    from uniface.detection import RetinaFace
    from uniface.constants import RetinaFaceWeights
    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
--- a/uniface/attribute/emotion.py
+++ b/uniface/attribute/emotion.py
@@ -10,10 +10,9 @@ from PIL import Image
 from typing import Tuple, Union
 from uniface.log import Logger
-from uniface import RetinaFace
+from uniface.constants import DDAMFNWeights
 from uniface.face_utils import face_alignment
 from uniface.model_store import verify_model_weights
 from uniface.constants import RetinaFaceWeights, DDAMFNWeights
 class Emotion:
@@ -21,10 +20,11 @@ class Emotion:
    Emotion recognition using a TorchScript model.
    Args:
-        model_name (DDAMFNWeights): Pretrained model enum. Defaults to AFFECNET7.
+        model_weights (DDAMFNWeights): Pretrained model weights enum. Defaults to AFFECNET7.
        input_size (Tuple[int, int]): Size of input images. Defaults to (112, 112).
    Attributes:
-        emotions (List[str]): Emotion label list.
+        emotion_labels (List[str]): List of emotion labels the model can predict.
        device (torch.device): Inference device (CPU or CUDA).
        model (torch.jit.ScriptModule): Loaded TorchScript model.
@@ -33,122 +33,133 @@ class Emotion:
        RuntimeError: If model loading fails.
    """
-    def __init__(self, model_name: DDAMFNWeights = DDAMFNWeights.AFFECNET7, input_size: Tuple[int, int] = (112, 112)) -> None:
+    def __init__(
            self,
            model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
            input_size: Tuple[int, int] = (112, 112)
    ) -> None:
        """
        Initialize the emotion detector with a TorchScript model
        """
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
+        self.emotion_labels = [
        self.emotions = [
            "Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"
        ]
        if model_name == DDAMFNWeights.AFFECNET8:
            self.emotions.append("Contempt")
        # Add contempt for AFFECNET8 model
        if model_weights == DDAMFNWeights.AFFECNET8:
            self.emotion_labels.append("Contempt")
        # Initialize image preprocessing parameters
        self.input_size = input_size
-        self.input_std = [0.229, 0.224, 0.225]
+        self.normalization_std = [0.229, 0.224, 0.225]
-        self.input_mean = [0.485, 0.456, 0.406]
+        self.normalization_mean = [0.485, 0.456, 0.406]
        Logger.info(
-            f"Initialized Emotion class with model={model_name.name}, "
+            f"Initialized Emotion class with model={model_weights.name}, "
            f"device={'cuda' if torch.cuda.is_available() else 'cpu'}, "
-            f"num_classes={len(self.emotions)}, input_size={self.input_size}"
+            f"num_classes={len(self.emotion_labels)}, input_size={self.input_size}"
        )
-        # Get path to model weights
+        # Get path to model weights and initialize model
-        self._model_path = verify_model_weights(model_name)
+        self.model_path = verify_model_weights(model_weights)
-        Logger.info(f"Verified model weights located at: {self._model_path}")
+        Logger.info(f"Verified model weights located at: {self.model_path}")
        self._load_model()
-        # Initialize model
+    def _load_model(self) -> None:
        self._initialize_model(model_path=self._model_path)
    def _initialize_model(self, model_path: str) -> None:
        """
-        Initializes a TorchScript model for emotion inference.
+        Loads and initializes a TorchScript model for emotion inference.
-        Args:
+        Raises:
-            model_path (str): Path to the TorchScript (.pt) model.
+            RuntimeError: If loading the model fails.
        """
        try:
-            self.model = torch.jit.load(model_path, map_location=self.device)
+            self.model = torch.jit.load(self.model_path, map_location=self.device)
            self.model.eval()
-            Logger.info(f"TorchScript model successfully loaded from: {model_path}")
+            Logger.info(f"TorchScript model successfully loaded from: {self.model_path}")
-            # Warm-up
+            # Warm-up with dummy input
-            dummy = torch.randn(1, 3, 112, 112).to(self.device)
+            dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
            with torch.no_grad():
-                _ = self.model(dummy)
+                _ = self.model(dummy_input)
            Logger.info("Emotion model warmed up with dummy input.")
        except Exception as e:
-            Logger.error(f"Failed to load TorchScript model from {model_path}: {e}")
+            Logger.error(f"Failed to load TorchScript model from {self.model_path}: {e}")
-            raise
+            raise RuntimeError(f"Model loading failed: {str(e)}")
    def preprocess(self, image: np.ndarray) -> torch.Tensor:
        """
-        Resize, normalize and convert image to tensor manually without torchvision.
+        Preprocess image for model inference: resize, normalize and convert to tensor.
        Args:
            image (np.ndarray): BGR image (H, W, 3)
        Returns:
            torch.Tensor: Preprocessed image tensor of shape (1, 3, 112, 112)
        """
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR -> RGB
-        # Resize to (112, 112)
+        Returns:
-        image = cv2.resize(image, self.input_size).astype(np.float32) / 255.0
+            torch.Tensor: Preprocessed image tensor of shape (1, 3, H, W)
        """
        # Convert BGR to RGB
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Resize to target input size
        resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0
        # Normalize with mean and std
-        mean = np.array(self.input_mean, dtype=np.float32)
+        mean_array = np.array(self.normalization_mean, dtype=np.float32)
-        std = np.array(self.input_std, dtype=np.float32)
+        std_array = np.array(self.normalization_std, dtype=np.float32)
-        image_normalized = (image - mean) / std
+        normalized_image = (resized_image - mean_array) / std_array
-        # HWC to CHW
+        # Convert from HWC to CHW format
-        image_transposed = image_normalized.transpose((2, 0, 1))
+        transposed_image = normalized_image.transpose((2, 0, 1))
        # Convert to torch tensor and add batch dimension
-        tensor = torch.from_numpy(image_transposed).unsqueeze(0).to(self.device)
+        tensor = torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
        return tensor
    def predict(self, image: np.ndarray, landmark: np.ndarray) -> Tuple[Union[str, None], Union[float, None]]:
        """
-        Predict the emotion from an BGR face image.
+        Predict the emotion from a face image.
        Args:
-            image (np.ndarray): Input face image in RGB format.
+            image (np.ndarray): Input face image in BGR format.
            landmark (np.ndarray): Facial five point landmark.
        Returns:
            Tuple[str, float]: (Predicted emotion label, Confidence score)
            Returns (None, None) if prediction fails.
        Raises:
-            RuntimeError: If the input is invalid or inference fails internally.
+            ValueError: If the input is not a valid BGR image.
        """
        # Validate input
        if not isinstance(image, np.ndarray):
            Logger.error("Input must be a NumPy ndarray.")
-            raise ValueError("Input must be a NumPy ndarray (RGB image).")
+            raise ValueError("Input must be a NumPy ndarray (BGR image).")
        if image.ndim != 3 or image.shape[2] != 3:
-            Logger.error(f"Invalid image shape: {image.shape}. Expected HxWx3 RGB image.")
+            Logger.error(f"Invalid image shape: {image.shape}. Expected HxWx3 image.")
-            raise ValueError("Input image must be in RGB format with shape (H, W, 3).")
+            raise ValueError("Input image must have shape (H, W, 3).")
        try:
-            image, _ = face_alignment(image, landmark)
+            # Align face using landmarks
-            tensor = self.preprocess(image)
+            aligned_image, _ = face_alignment(image, landmark)
            # Preprocess and run inference
            input_tensor = self.preprocess(aligned_image)
            with torch.no_grad():
-                output = self.model(tensor)
+                output = self.model(input_tensor)
                # Handle case where model returns a tuple
                if isinstance(output, tuple):
                    output = output[0]
-                probs = torch.nn.functional.softmax(output, dim=1).squeeze(0).cpu().numpy()
+                # Get probabilities and prediction
-                pred_idx = int(np.argmax(probs))
+                probabilities = torch.nn.functional.softmax(output, dim=1).squeeze(0).cpu().numpy()
-                confidence = round(float(probs[pred_idx]), 2)
+                predicted_index = int(np.argmax(probabilities))
                confidence_score = round(float(probabilities[predicted_index]), 2)
-                return self.emotions[pred_idx], confidence
+                return self.emotion_labels[predicted_index], confidence_score
        except Exception as e:
            Logger.error(f"Emotion inference failed: {e}")
@@ -158,6 +169,8 @@ class Emotion:
 # TODO: For testing purposes only, remove later
 def main():
    from uniface import RetinaFace
    from uniface.constants import RetinaFaceWeights
    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
--- a/uniface/detection/scrfd.py
+++ b/uniface/detection/scrfd.py
@@ -11,8 +11,9 @@ import onnxruntime as ort
 from typing import Tuple, List, Literal
 from uniface.log import Logger
 from uniface.model_store import verify_model_weights
 from uniface.constants import SCRFDWeights
 from uniface.model_store import verify_model_weights
 from .utils import non_max_supression, distance2bbox, distance2kps, resize_image
 __all__ = ['SCRFD']
@@ -248,13 +249,12 @@ class SCRFD:
            sorted_indices = np.argsort(values)[::-1][:max_num]
            det = det[sorted_indices]
            landmarks = landmarks[sorted_indices]
        return det, landmarks
 # TODO: below is only for testing, remove it later
 def draw_bbox(frame, bbox, color=(0, 255, 0), thickness=2):
    x1, y1, x2, y2 = bbox[:4].astype(np.int32)
    cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
@@ -267,6 +267,8 @@ def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
        cv2.circle(frame, (x, y), radius, color, -1)
 # TODO: Remove late, just for testing
 if __name__ == "__main__":
    detector = SCRFD(model_name=SCRFDWeights.SCRFD_500M_KPS)
    cap = cv2.VideoCapture(0)
--- a/uniface/landmark/model.py
+++ b/uniface/landmark/model.py
@@ -6,131 +6,163 @@ import numpy as np
 from typing import Tuple
 from uniface.log import Logger
-from uniface.face_utils import bbox_center_alignment, transform_points_2d
+from uniface.constants import LandmarkWeights
 from uniface.model_store import verify_model_weights
-
+from uniface.face_utils import bbox_center_alignment, transform_points_2d
 from uniface.detection import RetinaFace
 from uniface.constants import RetinaFaceWeights, LandmarkWeights
 __all__ = ['Landmark']
 class Landmark:
-    def __init__(self, model_name: LandmarkWeights = LandmarkWeights.DEFAULT, input_size: Tuple[int, int] = (192, 192)) -> None:
+    """
    Facial landmark detection model for predicting facial keypoints.
    """
    def __init__(
        self, 
        model_name: LandmarkWeights = LandmarkWeights.DEFAULT, 
        input_size: Tuple[int, int] = (192, 192)
    ) -> None:
        """
        Initializes the Facial Landmark model for inference.
        Args:
-            model_path (str): Path to the ONNX file.
+            model_name: Enum specifying which landmark model weights to use
            input_size: Input resolution for the model (width, height)
        """
        Logger.info(
            f"Initializing Facial Landmark with model={model_name}, "
            f"input_size={input_size}"
        )
        # Initialize configuration
        self.input_size = input_size
        self.input_std = 1.0
        self.input_mean = 0.0
        # Get path to model weights
-        self._model_path = verify_model_weights(model_name)
+        self.model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        Logger.info(f"Verified model weights located at: {self.model_path}")
        # Initialize model
-        self._initialize_model(model_path=self._model_path)
+        self._initialize_model()
-    def _initialize_model(self, model_path: str):
+    def _initialize_model(self):
-        """ Initialize the model from the given path.
+        """
-        Args:
+        Initialize the ONNX model from the stored model path.
-            model_path (str): Path to .onnx model.
+        
        Raises:
            RuntimeError: If the model fails to load or initialize.
        """
        try:
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            )
-            metadata = self.session.get_inputs()[0]
+            # Get input configuration
-            input_shape = metadata.shape
+            input_metadata = self.session.get_inputs()[0]
-            self.input_size = tuple(input_shape[2:4][::-1])
+            input_shape = input_metadata.shape
            self.input_size = tuple(input_shape[2:4][::-1])  # Update input size from model
-            self.input_names = [x.name for x in self.session.get_inputs()]
+            # Get input/output names
-            self.output_names = [x.name for x in self.session.get_outputs()]
+            self.input_names = [input.name for input in self.session.get_inputs()]
            self.output_names = [output.name for output in self.session.get_outputs()]
-            outputs = self.session.get_outputs()
+            # Determine landmark dimensions from output shape
-            output_shape = outputs[0].shape
+            output_shape = self.session.get_outputs()[0].shape
-            self.lmk_dim = 2
+            self.lmk_dim = 2  # x,y coordinates
-            self.lmk_num = output_shape[1] // self.lmk_dim
+            self.lmk_num = output_shape[1] // self.lmk_dim  # Number of landmarks
            Logger.info(f"Model initialized with {self.lmk_num} landmarks")
        except Exception as e:
-            print(f"Failed to load the model: {e}")
+            Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
-            raise
+            raise RuntimeError(f"Failed to initialize landmark model: {e}")
    def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
-        Preprocess the input image and bbox for inference.
+        Preprocess the input image and bounding box for inference.
        Args:
-            image (np.ndarray): Input image.
+            image: Input image in BGR format
-            bbox (np.ndarray): Bounding box [x1, y1, x2, y2].
+            bbox: Bounding box coordinates [x1, y1, x2, y2]
        Returns:
-            Tuple[np.ndarray, np.ndarray]: Preprocessed blob and transformation matrix.
+            Tuple containing:
                - Preprocessed image blob ready for inference
                - Transformation matrix for mapping predictions back to original image
        """
        # Calculate face dimensions and center
        width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
        center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
        # Determine scale to fit face with some margin
        scale = self.input_size[0] / (max(width, height) * 1.5)
        rotation = 0.0
-        transformed_image, M = bbox_center_alignment(image, center, self.input_size[0], scale, rotation)
+        # Align face using center, scale and rotation
-        input_size = tuple(transformed_image.shape[0:2][::-1])
+        aligned_face, transform_matrix = bbox_center_alignment(
-
+            image, center, self.input_size[0], scale, rotation
        blob = cv2.dnn.blobFromImage(
            transformed_image,
            1.0/self.input_std,
            input_size,
            (self.input_mean, self.input_mean, self.input_mean),
            swapRB=True
        )
-        return blob, M
+        
        # Convert to blob format for inference
        face_blob = cv2.dnn.blobFromImage(
            aligned_face,
            1.0 / self.input_std,
            self.input_size,
            (self.input_mean, self.input_mean, self.input_mean),
            swapRB=True  # Convert BGR to RGB
        )
        return face_blob, transform_matrix
-    def postprocess(self, predictions: np.ndarray, M: np.ndarray) -> np.ndarray:
+    def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
        """
-        Postprocess model outputs to get landmarks.
+        Convert raw model predictions to image coordinates.
        Args:
-            predictions (np.ndarray): Raw model predictions.
+            predictions: Raw landmark coordinates from model output
-            M (np.ndarray): Affine transformation matrix.
+            transform_matrix: Affine transformation matrix from preprocessing
        Returns:
-            np.ndarray: Transformed landmarks.
+            Landmarks in original image coordinates
        """
        # Reshape to pairs of x,y coordinates
        landmarks = predictions.reshape((-1, 2))
-        predictions = predictions.reshape((-1, 2))
+        # Denormalize coordinates to pixel space
        landmarks[:, 0:2] += 1  # Shift from [-1,1] to [0,2] range
        landmarks[:, 0:2] *= (self.input_size[0] // 2)  # Scale to pixel coordinates
-        predictions[:, 0:2] += 1
+        # Invert the transformation to map back to original image
-        predictions[:, 0:2] *= (self.input_size[0] // 2)
+        inverse_matrix = cv2.invertAffineTransform(transform_matrix)
        landmarks = transform_points_2d(landmarks, inverse_matrix)
-        IM = cv2.invertAffineTransform(M)
+        return landmarks
        predictions = transform_points_2d(predictions, IM)
        return predictions
    def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
        """
-        Predict facial landmarks for the given image and bounding box.
+        Predict facial landmarks for the given image and face bounding box.
        Args:
-            image (np.ndarray): Input image.
+            image: Input image in BGR format
-            bbox (np.ndarray): Bounding box [x1, y1, x2, y2].
+            bbox: Face bounding box [x1, y1, x2, y2]
        Returns:
-            np.ndarray: Predicted landmarks.
+            Array of facial landmarks in original image coordinates
        """
-        blob, M = self.preprocess(image, bbox)
+        # Preprocess image
-        preds = self.session.run(self.output_names, {self.input_names[0]: blob})[0][0]
+        face_blob, transform_matrix = self.preprocess(image, bbox)
-        landmarks = self.postprocess(preds, M)
+        
        # Run inference
        raw_predictions = self.session.run(
            self.output_names, 
            {self.input_names[0]: face_blob}
        )[0][0]
        # Postprocess to get landmarks in original image space
        landmarks = self.postprocess(raw_predictions, transform_matrix)
        return landmarks
@@ -138,7 +170,9 @@ class Landmark:
 if __name__ == "__main__":
-
+    from uniface.detection import RetinaFace
    from uniface.constants import RetinaFaceWeights
    face_detector = RetinaFace(
        model_name=RetinaFaceWeights.MNET_V2,
        conf_thresh=0.5,
--- a/uniface/recognition/base.py
+++ b/uniface/recognition/base.py
@@ -7,7 +7,6 @@ import os
 import cv2
 import numpy as np
 import onnxruntime as ort
 from typing import Tuple, Optional, Union, List
 from dataclasses import dataclass
@@ -37,100 +36,99 @@ class BaseFaceEncoder:
    def __init__(
        self,
-        model_name: SphereFaceWeights | MobileFaceWeights | ArcFaceWeights = MobileFaceWeights.MNET_V2,
+        model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2,
        preprocessing: PreprocessConfig = PreprocessConfig(),
    ) -> None:
        """
        Initializes the FaceEncoder model for inference.
        Args:
-            model_name (SphereFaceWeights | MobileFaceWeights | ArcFaceWeights): Selected model weight enum.
+            model_name: Selected model weight enum.
-            preprocessing (PreprocessConfig): Configuration for input normalization and resizing.
+            preprocessing: Configuration for input normalization and resizing.
        """
        # Store preprocessing parameters
        self.input_mean = preprocessing.input_mean
        self.input_std = preprocessing.input_std
        self.input_size = preprocessing.input_size
        Logger.info(
            f"Initializing Face Recognition with model={model_name}, "
-            f"input_mean={self.input_mean}, input_std={self.input_std}, input_size={self.input_size}"
+            f"input_mean={self.input_mean}, input_std={self.input_std}, "
            f"input_size={self.input_size}"
        )
-        # Get path to model weights
+        # Get path to model weights and initialize model
-        self._model_path = verify_model_weights(model_name)
+        self.model_path = verify_model_weights(model_name)
-        Logger.info(f"Verfied model weights located at: {self._model_path}")
+        Logger.info(f"Verified model weights located at: {self.model_path}")
-        # Initialize model
+        self._initialize_model()
        self._initialize_model(self._model_path)
-    def _initialize_model(self, model_path: str) -> None:
+    def _initialize_model(self) -> None:
        """
        Loads the ONNX model and prepares it for inference.
        Args:
            model_path (str): Path to the ONNX model file.
        Raises:
            RuntimeError: If the model fails to load or initialize.
        """
        try:
            # Initialize model session with available providers
            self.session = ort.InferenceSession(
-                model_path,
+                self.model_path,
                providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
            )
-            self._setup_model()
+
-            Logger.info(f"Successfully initialized face encoder from {model_path}")
+            # Extract input configuration
            input_cfg = self.session.get_inputs()[0]
            self.input_name = input_cfg.name
            # Verify input dimensions match our configuration
            input_shape = input_cfg.shape
            model_input_size = tuple(input_shape[2:4][::-1])  # (width, height)
            if model_input_size != self.input_size:
                Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
            # Extract output configuration
            self.output_names = [output.name for output in self.session.get_outputs()]
            self.output_shape = self.session.get_outputs()[0].shape
            assert len(self.output_names) == 1, "Expected only one output node."
            Logger.info(f"Successfully initialized face encoder from {self.model_path}")
        except Exception as e:
-            Logger.error(f"Failed to load face encoder model from '{model_path}'", exc_info=True)
+            Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
-            raise RuntimeError(f"Failed to initialize model session for '{model_path}'") from e
+            raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
-    def _setup_model(self) -> None:
+    def preprocess(self, face_img: np.ndarray) -> np.ndarray:
        """
        Extracts input/output configuration from the ONNX model session.
        """
        input_cfg = self.session.get_inputs()[0]
        input_shape = input_cfg.shape
        model_input_size = tuple(input_shape[2:4][::-1])  # (width, height)
        if model_input_size != self.input_size:
            Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
        self.input_name = input_cfg.name
        self.output_names = [output.name for output in self.session.get_outputs()]
        self.output_shape = self.session.get_outputs()[0].shape
        assert len(self.output_names) == 1, "Expected only one output node."
    def preprocess(self, image: np.ndarray) -> np.ndarray:
        """
        Preprocess the image: resize, normalize, and convert it to a blob.
        Args:
-            image (np.ndarray): Input image in BGR format.
+            face_img: Input image in BGR format.
        Returns:
-            np.ndarray: Preprocessed image as a NumPy array ready for inference.
+            Preprocessed image as a NumPy array ready for inference.
        """
-        image = cv2.resize(image, self.input_size)  # Resize to (112, 112)
+        resized_img = cv2.resize(face_img, self.input_size)
        if isinstance(self.input_std, (list, tuple)):
            # if self.input_std is a list, we assume it's per-channel std
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
-            image -= np.array(self.input_mean, dtype=np.float32)
+        if isinstance(self.input_std, (list, tuple)):
-            image /= np.array(self.input_std, dtype=np.float32)
+            # Per-channel normalization
            rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
            normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
                np.array(self.input_std, dtype=np.float32)
            # Change to NCHW (batch, channels, height, width)
-            blob = np.transpose(image, (2, 0, 1))  # CHW
+            blob = np.transpose(normalized_img, (2, 0, 1))  # CHW
            blob = np.expand_dims(blob, axis=0)  # NCHW
        else:
-            # cv2.dnn.blobFromImage does not support per-channel std so we use a single value here
+            # Single-value normalization
            blob = cv2.dnn.blobFromImage(
-                image,
+                resized_img,
                scalefactor=1.0 / self.input_std,
                size=self.input_size,
                mean=(self.input_mean, self.input_mean, self.input_mean),
                swapRB=True  # Convert BGR to RGB
            )
        return blob
    def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
@@ -138,13 +136,17 @@ class BaseFaceEncoder:
        Extracts face embedding from an aligned image.
        Args:
-            image (np.ndarray): Input face image (BGR format).
+            image: Input face image (BGR format).
-            landmarks (np.ndarray): Facial landmarks (5 points for alignment).
+            landmarks: Facial landmarks (5 points for alignment).
        Returns:
-            np.ndarray: 512-dimensional face embedding.
+            Face embedding vector (typically 512-dimensional).
        """
-        aligned_face, _ = face_alignment(image, landmarks)  # Use your function for alignment
+        # Align face using landmarks
-        blob = self.preprocess(aligned_face)  # Convert to blob
+        aligned_face, _ = face_alignment(image, landmarks)
-        embedding = self.session.run(self.output_names, {self.input_name: blob})[0]
+
-        return embedding  # Return the 512-D feature vector
+        # Generate embedding from aligned face
        face_blob = self.preprocess(aligned_face)
        embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
        return embedding