diff --git a/scripts/run_detection.py b/scripts/run_detection.py index 8904c66..8729395 100644 --- a/scripts/run_detection.py +++ b/scripts/run_detection.py @@ -4,16 +4,17 @@ import time import argparse import numpy as np -from uniface.detection import RetinaFace, draw_detections, SCRFD -from uniface.constants import RetinaFaceWeights, SCRFDWeights +# UPDATED: Use the factory function and import from the new location +from uniface.detection import create_detector +from uniface.visualization import draw_detections -def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"): +def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"): """ Run face detection on a single image. Args: - model (RetinaFace): Initialized RetinaFace model. + detector: Initialized face detector. image_path (str): Path to input image. vis_threshold (float): Threshold for drawing detections. save_dir (str): Directory to save output image. @@ -23,8 +24,18 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"): print(f"❌ Error: Failed to load image from '{image_path}'") return - boxes, landmarks = model.detect(image) - draw_detections(image, (boxes, landmarks), vis_threshold) + # 1. Get the list of face dictionaries from the detector + faces = detector.detect(image) + + if faces: + # 2. Unpack the data into separate lists + bboxes = [face['bbox'] for face in faces] + scores = [face['confidence'] for face in faces] + landmarks = [face['landmarks'] for face in faces] + + # 3. Pass the unpacked lists to the drawing function + draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6) + os.makedirs(save_dir, exist_ok=True) output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg") @@ -33,28 +44,38 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"): def main(): - parser = argparse.ArgumentParser(description="Run RetinaFace inference on an image.") + parser = argparse.ArgumentParser(description="Run face detection on an image.") parser.add_argument("--image", type=str, required=True, help="Path to the input image") - parser.add_argument("--model", type=str, default="MNET_V2", choices=[m.name for m in RetinaFaceWeights], help="Model variant to use") + parser.add_argument( + "--method", + type=str, + default="retinaface", + choices=['retinaface', 'scrfd'], + help="Detection method to use." + ) parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold") parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking") parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images") args = parser.parse_args() - model_name = RetinaFaceWeights[args.model] - model = RetinaFace(model_name=model_name) + print(f"Initializing detector: {args.method}") + detector = create_detector(method=args.method) avg_time = 0 for i in range(args.iterations): start = time.time() - run_inference(model, args.image, args.threshold, args.save_dir) + run_inference(detector, args.image, args.threshold, args.save_dir) elapsed = time.time() - start print(f"[{i + 1}/{args.iterations}] ⏱️ Inference time: {elapsed:.4f} seconds") - avg_time += elapsed + if i >= 0: # Avoid counting the first run if it includes model loading time + avg_time += elapsed if args.iterations > 1: - print(f"\n🔥 Average inference time over {args.iterations} runs: {avg_time / args.iterations:.4f} seconds") + # Adjust average calculation to exclude potential first-run overhead + effective_iterations = max(1, args.iterations) + print( + f"\n🔥 Average inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds") if __name__ == "__main__": diff --git a/scripts/run_face_search.py b/scripts/run_face_search.py new file mode 100644 index 0000000..e67c8bc --- /dev/null +++ b/scripts/run_face_search.py @@ -0,0 +1,101 @@ +import cv2 +import argparse +import numpy as np + +# Use the new high-level factory functions +from uniface.detection import create_detector +from uniface.recognition import create_recognizer +from uniface.face_utils import compute_similarity + + +def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray: + """Extracts a normalized embedding from the first face found in an image.""" + image = cv2.imread(image_path) + if image is None: + raise RuntimeError(f"Failed to load image: {image_path}") + + faces = detector.detect(image) + if not faces: + raise RuntimeError("No faces found in reference image.") + + # Get landmarks from the first detected face dictionary + landmarks = np.array(faces[0]['landmarks']) + + # Use normalized embedding for more reliable similarity comparison + embedding = recognizer.get_normalized_embedding(image, landmarks) + return embedding + + +def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4): + """Run real-time face recognition from a webcam feed.""" + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + raise RuntimeError("Webcam could not be opened.") + print("Webcam started. Press 'q' to quit.") + + while True: + ret, frame = cap.read() + if not ret: + break + + faces = detector.detect(frame) + + # Loop through each detected face + for face in faces: + # Extract bbox and landmarks from the dictionary + bbox = face['bbox'] + landmarks = np.array(face['landmarks']) + + x1, y1, x2, y2 = map(int, bbox) + + # Get the normalized embedding for the current face + embedding = recognizer.get_normalized_embedding(frame, landmarks) + + # Compare with the reference embedding + sim = compute_similarity(ref_embedding, embedding) + + # Draw results + label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})" + color = (0, 255, 0) if sim > threshold else (0, 0, 255) + cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) + cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) + + cv2.imshow("Face Recognition", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() + + +def main(): + parser = argparse.ArgumentParser(description="Face recognition using a reference image.") + parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.") + parser.add_argument( + "--detector", + type=str, + default="scrfd", + choices=['retinaface', 'scrfd'], + help="Face detection method." + ) + parser.add_argument( + "--recognizer", + type=str, + default="arcface", + choices=['arcface', 'mobileface', 'sphereface'], + help="Face recognition method." + ) + args = parser.parse_args() + + print("Initializing models...") + detector = create_detector(method=args.detector) + recognizer = create_recognizer(method=args.recognizer) + + print("Extracting reference embedding...") + ref_embedding = extract_reference_embedding(detector, recognizer, args.image) + + run_video(detector, recognizer, ref_embedding) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/run_recognition.py b/scripts/run_recognition.py index 183aead..adba469 100644 --- a/scripts/run_recognition.py +++ b/scripts/run_recognition.py @@ -2,18 +2,21 @@ import cv2 import argparse import numpy as np -from uniface.detection import RetinaFace -from uniface.constants import RetinaFaceWeights -from uniface.recognition import ArcFace +# Use the new high-level factory functions for consistency +from uniface.detection import create_detector +from uniface.recognition import create_recognizer + +# Import enums for argument choices +from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights -def run_inference(detector, recognizer, image_path): +def run_inference(detector, recognizer, image_path: str): """ Detect faces and extract embeddings from a single image. Args: - detector (RetinaFace): Initialized face detector. - recognizer (ArcFace): Face recognition model. + detector: Initialized face detector. + recognizer: Initialized face recognition model. image_path (str): Path to the input image. """ image = cv2.imread(image_path) @@ -21,36 +24,53 @@ def run_inference(detector, recognizer, image_path): print(f"Error: Failed to load image from '{image_path}'") return - boxes, landmarks = detector.detect(image) + faces = detector.detect(image) - if len(boxes) == 0: + if not faces: print("No faces detected.") return - print(f"Detected {len(boxes)} face(s). Extracting embeddings...") + print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...") - for i, landmark in enumerate(landmarks[:1]): - embedding = recognizer.get_embedding(image, landmark) - norm_embedding = recognizer.get_normalized_embedding(image, landmark) - print("embedding:", np.sum(embedding)) - print("norm embedding:",np.sum(norm_embedding)) + # Process the first detected face + first_face = faces[0] + landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array + + # Extract embedding using the landmarks from the face dictionary + embedding = recognizer.get_embedding(image, landmarks) + norm_embedding = recognizer.get_normalized_embedding(image, landmarks) + + # Print some info about the embeddings + print(f" - Embedding shape: {embedding.shape}") + print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}") + print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}") def main(): parser = argparse.ArgumentParser(description="Extract face embeddings from a single image.") parser.add_argument("--image", type=str, required=True, help="Path to the input image.") parser.add_argument( - "--model", + "--detector", type=str, - default="MNET_V2", - choices=[m.name for m in RetinaFaceWeights], - help="RetinaFace model variant to use." + default="retinaface", + choices=['retinaface', 'scrfd'], + help="Face detection method to use." + ) + parser.add_argument( + "--recognizer", + type=str, + default="arcface", + choices=['arcface', 'mobileface', 'sphereface'], + help="Face recognition method to use." ) args = parser.parse_args() - detector = RetinaFace(model_name=RetinaFaceWeights[args.model]) - recognizer = ArcFace() + print(f"Initializing detector: {args.detector}") + detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2) + + print(f"Initializing recognizer: {args.recognizer}") + recognizer = create_recognizer(method=args.recognizer) run_inference(detector, recognizer, args.image) diff --git a/scripts/search_face.py b/scripts/search_face.py deleted file mode 100644 index e183baa..0000000 --- a/scripts/search_face.py +++ /dev/null @@ -1,69 +0,0 @@ -import cv2 -import argparse -import numpy as np - -from uniface.detection import RetinaFace -from uniface.constants import RetinaFaceWeights -from uniface.recognition import ArcFace -from uniface.face_utils import compute_similarity - - -def extract_reference_embedding(detector, recognizer, image_path): - image = cv2.imread(image_path) - if image is None: - raise RuntimeError(f"Failed to load image: {image_path}") - - boxes, landmarks = detector.detect(image) - if len(boxes) == 0: - raise RuntimeError("No faces found in reference image.") - - embedding = recognizer.get_embedding(image, landmarks[0]) - return embedding - - -def run_video(detector, recognizer, ref_embedding, threshold=0.30): - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - raise RuntimeError("Webcam could not be opened.") - - while True: - ret, frame = cap.read() - if not ret: - break - - boxes, landmarks = detector.detect(frame) - - for box, lm in zip(boxes, landmarks): - x1, y1, x2, y2 = map(int, box[:4]) - embedding = recognizer.get_embedding(frame, lm) - sim = compute_similarity(ref_embedding, embedding) - label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})" - color = (0, 255, 0) if sim > threshold else (0, 0, 255) - - cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) - cv2.putText(frame, label, (x1, y1 - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) - - cv2.imshow("Face Recognition", frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - cap.release() - cv2.destroyAllWindows() - - -def main(): - parser = argparse.ArgumentParser(description="Face recognition using a reference image.") - parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.") - parser.add_argument("--model", type=str, default="MNET_V2", - choices=[m.name for m in RetinaFaceWeights], help="Face detector model.") - args = parser.parse_args() - - detector = RetinaFace(model_name=RetinaFaceWeights[args.model]) - recognizer = ArcFace() - ref_embedding = extract_reference_embedding(detector, recognizer, args.image) - run_video(detector, recognizer, ref_embedding) - - -if __name__ == "__main__": - main() diff --git a/uniface/__init__.py b/uniface/__init__.py index 094f77a..be0399e 100644 --- a/uniface/__init__.py +++ b/uniface/__init__.py @@ -15,7 +15,10 @@ __license__ = "MIT" __author__ = "Yakhyokhuja Valikhujaev" __version__ = "0.1.8" + from .detection import detect_faces, create_detector, list_available_detectors +from .recognition import create_recognizer +from .landmark import create_landmarker from uniface.face_utils import face_alignment, compute_similarity from uniface.model_store import verify_model_weights @@ -25,22 +28,20 @@ from uniface.log import Logger __all__ = [ - # Metadata - "__version__", - "__author__", - "__license__", + '__author__', + '__license__', + '__version__', - # Core functions - 'detect_faces', 'create_detector', + 'create_landmarker', + 'create_recognizer', + 'detect_faces', 'list_available_detectors', - # Utility functions - "face_alignment", - "compute_similarity", - "verify_model_weights", - "draw_detections", + 'compute_similarity', + 'draw_detections', + 'face_alignment', + 'verify_model_weights', - # Classes - "Logger", + 'Logger' ] diff --git a/uniface/landmark/__init__.py b/uniface/landmark/__init__.py index 2271f13..cacd764 100644 --- a/uniface/landmark/__init__.py +++ b/uniface/landmark/__init__.py @@ -1 +1,32 @@ -from .model import Landmark +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +from .models import Landmark106 +from .base import BaseLandmarker + + +def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker: + """ + Factory function to create facial landmark predictors. + + Args: + method (str): Landmark prediction method. Options: '106'. + **kwargs: Model-specific parameters. + + Returns: + Initialized landmarker instance. + """ + method = method.lower() + if method == 'insightface_106': + return Landmark106(**kwargs) + else: + available = ['insightface_106'] + raise ValueError(f"Unsupported method: '{method}'. Available: {available}") + + +__all__ = [ + "create_landmarker", + "Landmark106", + "BaseLandmarker" +] diff --git a/uniface/landmark/base.py b/uniface/landmark/base.py new file mode 100644 index 0000000..ade7710 --- /dev/null +++ b/uniface/landmark/base.py @@ -0,0 +1,30 @@ +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +from abc import ABC, abstractmethod +import numpy as np + + +class BaseLandmarker(ABC): + """ + Abstract Base Class for all facial landmark models. + """ + @abstractmethod + def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray: + """ + Predicts facial landmarks for a given face bounding box. + + This method defines the standard interface for all landmark predictors. + It takes a full image and a bounding box for a single face and returns + the predicted keypoints for that face. + + Args: + image (np.ndarray): The full source image in BGR format. + bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2]. + + Returns: + np.ndarray: An array of predicted landmark points with shape (N, 2), + where N is the number of landmarks. + """ + raise NotImplementedError diff --git a/uniface/landmark/model.py b/uniface/landmark/model.py deleted file mode 100644 index 39630f8..0000000 --- a/uniface/landmark/model.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright 2025 Yakhyokhuja Valikhujaev -# Author: Yakhyokhuja Valikhujaev -# GitHub: https://github.com/yakhyo - -import cv2 -import numpy as np -import onnxruntime as ort - -from typing import Tuple - -from uniface.log import Logger -from uniface.constants import LandmarkWeights -from uniface.model_store import verify_model_weights -from uniface.face_utils import bbox_center_alignment, transform_points_2d - -__all__ = ['Landmark'] - - -class Landmark: - """ - Facial landmark detection model for predicting 106 facial keypoints using ONNX model. - - This class wraps a pretrained facial landmark model to detect 106 key facial points - such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box. - It handles model verification, input preprocessing, ONNX inference execution, - and projection of landmark coordinates back to the original image space. - - Attributes: - input_size (Tuple[int, int]): Model's expected input resolution (width, height). - input_mean (float): Mean value used for input normalization. - input_std (float): Standard deviation used for input normalization. - model_path (str): Path to the verified ONNX model file. - session (onnxruntime.InferenceSession): ONNX Runtime session for inference. - input_names (List[str]): List of input node names. - output_names (List[str]): List of output node names. - lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y). - lmk_num (int): Total number of landmark points predicted by the model (106). - - Args: - model_name (LandmarkWeights): Enum specifying the landmark model to load. - input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192). - """ - - def __init__( - self, - model_name: LandmarkWeights = LandmarkWeights.DEFAULT, - input_size: Tuple[int, int] = (192, 192) - ) -> None: - """ - Initializes the Facial Landmark model for inference. - - Args: - model_name: Enum specifying which landmark model weights to use - input_size: Input resolution for the model (width, height) - """ - Logger.info( - f"Initializing Facial Landmark with model={model_name}, " - f"input_size={input_size}" - ) - - # Initialize configuration - self.input_size = input_size - self.input_std = 1.0 - self.input_mean = 0.0 - - # Get path to model weights - self.model_path = verify_model_weights(model_name) - Logger.info(f"Verified model weights located at: {self.model_path}") - - # Initialize model - self._initialize_model() - - def _initialize_model(self): - """ - Initialize the ONNX model from the stored model path. - - Raises: - RuntimeError: If the model fails to load or initialize. - """ - try: - self.session = ort.InferenceSession( - self.model_path, - providers=["CUDAExecutionProvider", "CPUExecutionProvider"] - ) - - # Get input configuration - input_metadata = self.session.get_inputs()[0] - input_shape = input_metadata.shape - self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model - - # Get input/output names - self.input_names = [input.name for input in self.session.get_inputs()] - self.output_names = [output.name for output in self.session.get_outputs()] - - # Determine landmark dimensions from output shape - output_shape = self.session.get_outputs()[0].shape - self.lmk_dim = 2 # x,y coordinates - self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks - - Logger.info(f"Model initialized with {self.lmk_num} landmarks") - - except Exception as e: - Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True) - raise RuntimeError(f"Failed to initialize landmark model: {e}") - - def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: - """ - Preprocess the input image and bounding box for inference. - - Args: - image: Input image in BGR format - bbox: Bounding box coordinates [x1, y1, x2, y2] - - Returns: - Tuple containing: - - Preprocessed image blob ready for inference - - Transformation matrix for mapping predictions back to original image - """ - # Calculate face dimensions and center - width, height = bbox[2] - bbox[0], bbox[3] - bbox[1] - center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2 - - # Determine scale to fit face with some margin - scale = self.input_size[0] / (max(width, height) * 1.5) - rotation = 0.0 - - # Align face using center, scale and rotation - aligned_face, transform_matrix = bbox_center_alignment( - image, center, self.input_size[0], scale, rotation - ) - - # Convert to blob format for inference - face_blob = cv2.dnn.blobFromImage( - aligned_face, - 1.0 / self.input_std, - self.input_size, - (self.input_mean, self.input_mean, self.input_mean), - swapRB=True # Convert BGR to RGB - ) - - return face_blob, transform_matrix - - def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray: - """ - Convert raw model predictions to image coordinates. - - Args: - predictions: Raw landmark coordinates from model output - transform_matrix: Affine transformation matrix from preprocessing - - Returns: - Landmarks in original image coordinates - """ - # Reshape to pairs of x,y coordinates - landmarks = predictions.reshape((-1, 2)) - - # Denormalize coordinates to pixel space - landmarks[:, 0:2] += 1 # Shift from [-1,1] to [0,2] range - landmarks[:, 0:2] *= (self.input_size[0] // 2) # Scale to pixel coordinates - - # Invert the transformation to map back to original image - inverse_matrix = cv2.invertAffineTransform(transform_matrix) - landmarks = transform_points_2d(landmarks, inverse_matrix) - - return landmarks - - def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray: - """ - Predict facial landmarks for the given image and face bounding box. - - Args: - image: Input image in BGR format - bbox: Face bounding box [x1, y1, x2, y2] - - Returns: - Array of facial landmarks in original image coordinates - """ - # Preprocess image - face_blob, transform_matrix = self.preprocess(image, bbox) - - # Run inference - raw_predictions = self.session.run( - self.output_names, - {self.input_names[0]: face_blob} - )[0][0] - - # Postprocess to get landmarks in original image space - landmarks = self.postprocess(raw_predictions, transform_matrix) - - return landmarks - -# TODO: For testing purposes only, remote later - - -if __name__ == "__main__": - from uniface.detection import RetinaFace - from uniface.constants import RetinaFaceWeights - - face_detector = RetinaFace( - model_name=RetinaFaceWeights.MNET_V2, - conf_thresh=0.5, - pre_nms_topk=5000, - nms_thresh=0.4, - post_nms_topk=750, - dynamic_size=False, - input_size=(640, 640) - ) - - model = Landmark() - - cap = cv2.VideoCapture(0) - if not cap.isOpened(): - print("Webcam not available.") - exit() - - print("Press 'q' to quit.") - - while True: - ret, frame = cap.read() - if not ret: - print("Frame capture failed.") - break - - boxes, landmarks = face_detector.detect(frame) - - if boxes is None or len(boxes) == 0: - cv2.imshow("Facial Landmark Detection", frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - continue - - for box in boxes: - x1, y1, x2, y2, score = box.astype(int) - - lmk = model.predict(frame, box[:4]) - - for (x, y) in lmk.astype(int): - cv2.circle(frame, (x, y), 2, (0, 255, 0), -1) - - cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) - - cv2.imshow("Facial Landmark Detection", frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - cap.release() - cv2.destroyAllWindows() diff --git a/uniface/landmark/models.py b/uniface/landmark/models.py new file mode 100644 index 0000000..3ba7b8e --- /dev/null +++ b/uniface/landmark/models.py @@ -0,0 +1,217 @@ +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +import cv2 +import numpy as np +import onnxruntime as ort +from typing import Tuple + +from uniface.log import Logger +from uniface.constants import LandmarkWeights +from uniface.model_store import verify_model_weights +from uniface.face_utils import bbox_center_alignment, transform_points_2d +from .base import BaseLandmarker + +__all__ = ['Landmark'] + + +class Landmark106(BaseLandmarker): + """Facial landmark model for predicting 106 facial keypoints. + + This class implements the BaseLandmarker and provides an end-to-end + pipeline for 106-point facial landmark detection. It handles model + loading, preprocessing of a face crop based on a bounding box, + inference, and post-processing to map landmarks back to the + original image coordinates. + + Args: + model_name (LandmarkWeights): The enum specifying the landmark model to load. + Defaults to `LandmarkWeights.DEFAULT`. + input_size (Tuple[int, int]): The resolution (width, height) for the model's + input. Defaults to (192, 192). + + Example: + >>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box + >>> # bbox = [x1, y1, x2, y2] + >>> + >>> landmarker = Landmark106() + >>> landmarks = landmarker.get_landmarks(image, bbox) + >>> print(landmarks.shape) + (106, 2) + """ + def __init__( + self, + model_name: LandmarkWeights = LandmarkWeights.DEFAULT, + input_size: Tuple[int, int] = (192, 192) + ) -> None: + Logger.info( + f"Initializing Facial Landmark with model={model_name}, " + f"input_size={input_size}" + ) + self.input_size = input_size + self.input_std = 1.0 + self.input_mean = 0.0 + self.model_path = verify_model_weights(model_name) + self._initialize_model() + + def _initialize_model(self): + """ + Initialize the ONNX model from the stored model path. + + Raises: + RuntimeError: If the model fails to load or initialize. + """ + try: + self.session = ort.InferenceSession( + self.model_path, + providers=["CUDAExecutionProvider", "CPUExecutionProvider"] + ) + + # Get input configuration + input_metadata = self.session.get_inputs()[0] + input_shape = input_metadata.shape + self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model + + # Get input/output names + self.input_names = [input.name for input in self.session.get_inputs()] + self.output_names = [output.name for output in self.session.get_outputs()] + + # Determine landmark dimensions from output shape + output_shape = self.session.get_outputs()[0].shape + self.lmk_dim = 2 # x,y coordinates + self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks + + Logger.info(f"Model initialized with {self.lmk_num} landmarks") + + except Exception as e: + Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True) + raise RuntimeError(f"Failed to initialize landmark model: {e}") + + def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Prepares a face crop for inference. + + This method takes a face bounding box, performs a center alignment to + warp the face into the model's required input size, and then creates + a normalized blob ready for the ONNX session. + + Args: + image (np.ndarray): The full source image in BGR format. + bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2]. + + Returns: + Tuple[np.ndarray, np.ndarray]: A tuple containing: + - The preprocessed image blob ready for inference. + - The affine transformation matrix used for alignment. + """ + width, height = bbox[2] - bbox[0], bbox[3] - bbox[1] + center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) + scale = self.input_size[0] / (max(width, height) * 1.5) + + aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0) + + face_blob = cv2.dnn.blobFromImage( + aligned_face, 1.0 / self.input_std, self.input_size, + (self.input_mean, self.input_mean, self.input_mean), swapRB=True + ) + return face_blob, transform_matrix + + def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray: + """Converts raw model predictions back to original image coordinates. + + This method reshapes the model's flat output array into landmark points, + denormalizes them to the model's input space, and then applies an + inverse affine transformation to map them back to the original image space. + + Args: + predictions (np.ndarray): Raw landmark coordinates from the model output. + transform_matrix (np.ndarray): The affine transformation matrix from preprocessing. + + Returns: + np.ndarray: An array of landmark points in the original image's coordinates. + """ + landmarks = predictions.reshape((-1, 2)) + landmarks[:, 0:2] += 1 + landmarks[:, 0:2] *= (self.input_size[0] // 2) + + inverse_matrix = cv2.invertAffineTransform(transform_matrix) + landmarks = transform_points_2d(landmarks, inverse_matrix) + return landmarks + + def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray: + """Predicts facial landmarks for the given image and face bounding box. + + This is the main public method that orchestrates the full pipeline of + preprocessing, inference, and post-processing. + + Args: + image (np.ndarray): The full source image in BGR format. + bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2]. + + Returns: + np.ndarray: An array of predicted landmark points with shape (106, 2). + """ + face_blob, transform_matrix = self.preprocess(image, bbox) + raw_predictions = self.session.run( + self.output_names, {self.input_names[0]: face_blob} + )[0][0] + landmarks = self.postprocess(raw_predictions, transform_matrix) + return landmarks + + + +# TODO: For testing purposes only, remote later +if __name__ == "__main__": + # UPDATED: Use the high-level factory functions + from uniface.detection import create_detector + from uniface.landmark import create_landmarker + + # 1. Create the detector and landmarker using the new API + face_detector = create_detector('retinaface') + landmarker = create_landmarker() # Uses the default '106' method + + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + print("Webcam not available.") + exit() + + print("Press 'q' to quit.") + + while True: + ret, frame = cap.read() + if not ret: + print("Frame capture failed.") + break + + # 2. The detect method returns a list of dictionaries + faces = face_detector.detect(frame) + + if not faces: + cv2.imshow("Facial Landmark Detection", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + continue + + # 3. Loop through the list of face dictionaries + for face in faces: + # Extract the bounding box + bbox = face['bbox'] + + # 4. Get landmarks for the current face using its bounding box + landmarks = landmarker.get_landmarks(frame, bbox) + + # --- Drawing Logic --- + # Draw the landmarks + for (x, y) in landmarks.astype(int): + cv2.circle(frame, (x, y), 2, (0, 255, 0), -1) + + # Draw the bounding box + x1, y1, x2, y2 = map(int, bbox) + cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) + + cv2.imshow("Facial Landmark Detection", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + cap.release() + cv2.destroyAllWindows() diff --git a/uniface/recognition/__init__.py b/uniface/recognition/__init__.py index 83825c1..9f13e32 100644 --- a/uniface/recognition/__init__.py +++ b/uniface/recognition/__init__.py @@ -1,2 +1,63 @@ -from .base import PreprocessConfig -from .models import SphereFace, MobileFace, ArcFace +# Copyright 2025 Yakhyokhuja Valikhujaev +# Author: Yakhyokhuja Valikhujaev +# GitHub: https://github.com/yakhyo + +from typing import Dict +from .models import ArcFace, MobileFace, SphereFace +from .base import BaseRecognizer +from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights + +def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer: + """ + Factory function to create face recognizers. + + This function initializes and returns a face recognizer instance based on the + specified method. It acts as a high-level interface to the underlying + model classes like ArcFace, MobileFace, etc. + + Args: + method (str): The recognition method to use. + Options: 'arcface' (default), 'mobileface', 'sphereface'. + **kwargs: Model-specific parameters passed to the recognizer's constructor. + For example, `model_name` can be used to select a specific + pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`). + + Returns: + BaseRecognizer: An initialized recognizer instance ready for use. + + Raises: + ValueError: If the specified `method` is not supported. + + Examples: + >>> # Create the default ArcFace recognizer + >>> recognizer = create_recognizer() + + >>> # Create a specific MobileFace recognizer + >>> from uniface.constants import MobileFaceWeights + >>> recognizer = create_recognizer( + ... 'mobileface', + ... model_name=MobileFaceWeights.MNET_V2 + ... ) + + >>> # Create a SphereFace recognizer + >>> recognizer = create_recognizer('sphereface') + """ + method = method.lower() + + if method == 'arcface': + return ArcFace(**kwargs) + elif method == 'mobileface': + return MobileFace(**kwargs) + elif method == 'sphereface': + return SphereFace(**kwargs) + else: + available = ['arcface', 'mobileface', 'sphereface'] + raise ValueError(f"Unsupported method: '{method}'. Available: {available}") + +__all__ = [ + "create_recognizer", + "ArcFace", + "MobileFace", + "SphereFace", + "BaseRecognizer", +] \ No newline at end of file diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py index 3f98a2c..b617e46 100644 --- a/uniface/recognition/base.py +++ b/uniface/recognition/base.py @@ -2,20 +2,15 @@ # Author: Yakhyokhuja Valikhujaev # GitHub: https://github.com/yakhyo +from abc import ABC, abstractmethod import cv2 import numpy as np import onnxruntime as ort from dataclasses import dataclass - from typing import Tuple, Union, List from uniface.log import Logger -from uniface.model_store import verify_model_weights from uniface.face_utils import face_alignment -from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights - - -__all__ = ["BaseModel", "PreprocessConfig"] @dataclass @@ -28,38 +23,25 @@ class PreprocessConfig: input_size: Tuple[int, int] = (112, 112) -class BaseModel: +class BaseRecognizer(ABC): """ - Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace). + Abstract Base Class for all face recognition models. + It provides the core functionality for preprocessing, inference, and embedding extraction. """ - - def __init__( - self, - model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2, - preprocessing: PreprocessConfig = PreprocessConfig(), - ) -> None: + @abstractmethod + def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None: """ - Initializes the FaceEncoder model for inference. + Initializes the model. Subclasses must call this. Args: - model_name: Selected model weight enum. - preprocessing: Configuration for input normalization and resizing. + model_path (str): The direct path to the verified ONNX model. + preprocessing (PreprocessConfig): The configuration for preprocessing. """ - # Store preprocessing parameters self.input_mean = preprocessing.input_mean self.input_std = preprocessing.input_std self.input_size = preprocessing.input_size - Logger.info( - f"Initializing Face Recognition with model={model_name}, " - f"input_mean={self.input_mean}, input_std={self.input_std}, " - f"input_size={self.input_size}" - ) - - # Get path to model weights and initialize model - self.model_path = verify_model_weights(model_name) - Logger.info(f"Verified model weights located at: {self.model_path}") - + self.model_path = model_path self._initialize_model() def _initialize_model(self) -> None: @@ -152,14 +134,15 @@ class BaseModel: def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray: """ - Extracts l2 normalized face embedding vector from an image + Extracts a l2 normalized face embedding vector from an image. Args: image: Input face image (BGR format). landmarks: Facial landmarks (5 points for alignment). Returns: - Normalied face embedding vector (typically 512-dimensional). + Normalized face embedding vector (typically 512-dimensional). """ embedding = self.get_embedding(image, landmarks) - return embedding / np.linalg.norm(embedding) + norm = np.linalg.norm(embedding) + return embedding / norm if norm > 0 else embedding diff --git a/uniface/recognition/models.py b/uniface/recognition/models.py index 593f18f..e1e0e3c 100644 --- a/uniface/recognition/models.py +++ b/uniface/recognition/models.py @@ -4,27 +4,35 @@ from typing import Optional -from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights -from .base import BaseModel, PreprocessConfig +from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights +from uniface.model_store import verify_model_weights +from .base import BaseRecognizer, PreprocessConfig + +__all__ = ["ArcFace", "MobileFace", "SphereFace"] -__all__ = ["SphereFace", "MobileFace", "ArcFace"] +class ArcFace(BaseRecognizer): + """ArcFace model for robust face recognition. - -class SphereFace(BaseModel): - """ - SphereFace face encoder class. - - This class loads a SphereFace model for face embedding extraction. - It supports configurable preprocessing, with a default mean/std and input size of 112x112. + This class provides a concrete implementation of the BaseRecognizer, + pre-configured for ArcFace models. It handles the loading of specific + ArcFace weights and sets up the appropriate default preprocessing. Args: - model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20. - preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization. + model_name (ArcFaceWeights): The specific ArcFace model variant to use. + Defaults to `ArcFaceWeights.MNET`. + preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing + configuration. If None, a default config for ArcFace is used. + + Example: + >>> from uniface.recognition import ArcFace + >>> recognizer = ArcFace() + >>> # embedding = recognizer.get_normalized_embedding(image, landmarks) """ def __init__( - self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20, + self, + model_name: ArcFaceWeights = ArcFaceWeights.MNET, preprocessing: Optional[PreprocessConfig] = None ) -> None: if preprocessing is None: @@ -33,23 +41,32 @@ class SphereFace(BaseModel): input_std=127.5, input_size=(112, 112) ) - super().__init__(model_name=model_name, preprocessing=preprocessing) + model_path = verify_model_weights(model_name) + super().__init__(model_path=model_path, preprocessing=preprocessing) -class MobileFace(BaseModel): - """ - MobileFace face encoder class. +class MobileFace(BaseRecognizer): + """Lightweight MobileFaceNet model for fast face recognition. - Loads a lightweight MobileFaceNet model for fast face embedding extraction. - Default input normalization and resizing applied if preprocessing is not provided. + This class provides a concrete implementation of the BaseRecognizer, + pre-configured for MobileFaceNet models. It is optimized for speed, + making it suitable for edge devices. Args: - model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2. - preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size. + model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use. + Defaults to `MobileFaceWeights.MNET_V2`. + preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing + configuration. If None, a default config for MobileFaceNet is used. + + Example: + >>> from uniface.recognition import MobileFace + >>> recognizer = MobileFace() + >>> # embedding = recognizer.get_normalized_embedding(image, landmarks) """ def __init__( - self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2, + self, + model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2, preprocessing: Optional[PreprocessConfig] = None ) -> None: if preprocessing is None: @@ -58,23 +75,32 @@ class MobileFace(BaseModel): input_std=127.5, input_size=(112, 112) ) - super().__init__(model_name=model_name) + model_path = verify_model_weights(model_name) + super().__init__(model_path=model_path, preprocessing=preprocessing) -class ArcFace(BaseModel): - """ - ArcFace face encoder class. +class SphereFace(BaseRecognizer): + """SphereFace model using angular margin for face recognition. - Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation. - Applies standard preprocessing unless overridden. + This class provides a concrete implementation of the BaseRecognizer, + pre-configured for SphereFace models, which were among the first to + introduce angular margin loss functions. Args: - model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET. - preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified. + model_name (SphereFaceWeights): The specific SphereFace model variant to use. + Defaults to `SphereFaceWeights.SPHERE20`. + preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing + configuration. If None, a default config for SphereFace is used. + + Example: + >>> from uniface.recognition import SphereFace + >>> recognizer = SphereFace() + >>> # embedding = recognizer.get_normalized_embedding(image, landmarks) """ def __init__( - self, model_name: ArcFaceWeights = ArcFaceWeights.MNET, + self, + model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20, preprocessing: Optional[PreprocessConfig] = None ) -> None: if preprocessing is None: @@ -83,4 +109,6 @@ class ArcFace(BaseModel): input_std=127.5, input_size=(112, 112) ) - super().__init__(model_name=model_name) + + model_path = verify_model_weights(model_name) + super().__init__(model_path=model_path, preprocessing=preprocessing) diff --git a/uniface/visualization.py b/uniface/visualization.py index f085ebc..d7a8068 100644 --- a/uniface/visualization.py +++ b/uniface/visualization.py @@ -4,42 +4,47 @@ import cv2 import numpy as np +from typing import List, Union -def draw_detections(image, detections, vis_threshold: float = 0.6): +def draw_detections( + image: np.ndarray, + bboxes: Union[np.ndarray, List[List[float]]], + scores: Union[np.ndarray, List[float]], + landmarks: Union[np.ndarray, List[List[List[float]]]], + vis_threshold: float = 0.6 +): """ - Draw bounding boxes and landmarks on the image with thickness scaled by bbox size. + Draws bounding boxes, scores, and landmarks from separate lists onto an image. Args: - image (ndarray): Image to draw detections on. - detections (tuple): (bounding boxes, landmarks) as NumPy arrays. - vis_threshold (float): Confidence threshold for filtering detections. + image (np.ndarray): The image to draw on. + bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...]. + scores (list or np.ndarray): A list of confidence scores. + landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...]. + vis_threshold (float): Confidence threshold for filtering which detections to draw. """ - _colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)] - # Unpack detections - boxes, landmarks = detections - scores = boxes[:, 4] + # Filter detections by score + keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold] - # Filter detections by confidence threshold - filtered = scores >= vis_threshold - boxes = boxes[filtered, :4].astype(np.int32) - landmarks = landmarks[filtered] - scores = scores[filtered] + # Draw the filtered detections + for i in keep_indices: + bbox = np.array(bboxes[i], dtype=np.int32) + score = scores[i] + landmark_set = np.array(landmarks[i], dtype=np.int32) - # Draw bounding boxes, scores, and landmarks - for box, score, landmark in zip(boxes, scores, landmarks): - # Calculate thickness proportional to the bbox size - thickness = max(1, int(min(box[2] - box[0], box[3] - box[1]) / 100)) + # Calculate adaptive thickness + thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100)) - # Draw rectangle - cv2.rectangle(image, tuple(box[:2]), tuple(box[2:]), (0, 0, 255), thickness) + # Draw bounding box + cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness) # Draw score - cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12), + cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness) # Draw landmarks - for point, color in zip(landmark, _colors): - cv2.circle(image, tuple(point), thickness, color, -1) + for j, point in enumerate(landmark_set): + cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)