feat: Update recognition, landmark modules

This commit is contained in:
yakhyo
2025-07-02 18:52:28 +09:00
parent b15504dfc5
commit 0417f7531f
13 changed files with 633 additions and 451 deletions

View File

@@ -4,16 +4,17 @@ import time
import argparse
import numpy as np
from uniface.detection import RetinaFace, draw_detections, SCRFD
from uniface.constants import RetinaFaceWeights, SCRFDWeights
# UPDATED: Use the factory function and import from the new location
from uniface.detection import create_detector
from uniface.visualization import draw_detections
def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
"""
Run face detection on a single image.
Args:
model (RetinaFace): Initialized RetinaFace model.
detector: Initialized face detector.
image_path (str): Path to input image.
vis_threshold (float): Threshold for drawing detections.
save_dir (str): Directory to save output image.
@@ -23,8 +24,18 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
print(f"❌ Error: Failed to load image from '{image_path}'")
return
boxes, landmarks = model.detect(image)
draw_detections(image, (boxes, landmarks), vis_threshold)
# 1. Get the list of face dictionaries from the detector
faces = detector.detect(image)
if faces:
# 2. Unpack the data into separate lists
bboxes = [face['bbox'] for face in faces]
scores = [face['confidence'] for face in faces]
landmarks = [face['landmarks'] for face in faces]
# 3. Pass the unpacked lists to the drawing function
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
@@ -33,28 +44,38 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
def main():
parser = argparse.ArgumentParser(description="Run RetinaFace inference on an image.")
parser = argparse.ArgumentParser(description="Run face detection on an image.")
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
parser.add_argument("--model", type=str, default="MNET_V2", choices=[m.name for m in RetinaFaceWeights], help="Model variant to use")
parser.add_argument(
"--method",
type=str,
default="retinaface",
choices=['retinaface', 'scrfd'],
help="Detection method to use."
)
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
args = parser.parse_args()
model_name = RetinaFaceWeights[args.model]
model = RetinaFace(model_name=model_name)
print(f"Initializing detector: {args.method}")
detector = create_detector(method=args.method)
avg_time = 0
for i in range(args.iterations):
start = time.time()
run_inference(model, args.image, args.threshold, args.save_dir)
run_inference(detector, args.image, args.threshold, args.save_dir)
elapsed = time.time() - start
print(f"[{i + 1}/{args.iterations}] ⏱️ Inference time: {elapsed:.4f} seconds")
if i >= 0: # Avoid counting the first run if it includes model loading time
avg_time += elapsed
if args.iterations > 1:
print(f"\n🔥 Average inference time over {args.iterations} runs: {avg_time / args.iterations:.4f} seconds")
# Adjust average calculation to exclude potential first-run overhead
effective_iterations = max(1, args.iterations)
print(
f"\n🔥 Average inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
if __name__ == "__main__":

101
scripts/run_face_search.py Normal file
View File

@@ -0,0 +1,101 @@
import cv2
import argparse
import numpy as np
# Use the new high-level factory functions
from uniface.detection import create_detector
from uniface.recognition import create_recognizer
from uniface.face_utils import compute_similarity
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
"""Extracts a normalized embedding from the first face found in an image."""
image = cv2.imread(image_path)
if image is None:
raise RuntimeError(f"Failed to load image: {image_path}")
faces = detector.detect(image)
if not faces:
raise RuntimeError("No faces found in reference image.")
# Get landmarks from the first detected face dictionary
landmarks = np.array(faces[0]['landmarks'])
# Use normalized embedding for more reliable similarity comparison
embedding = recognizer.get_normalized_embedding(image, landmarks)
return embedding
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
"""Run real-time face recognition from a webcam feed."""
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Webcam could not be opened.")
print("Webcam started. Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
break
faces = detector.detect(frame)
# Loop through each detected face
for face in faces:
# Extract bbox and landmarks from the dictionary
bbox = face['bbox']
landmarks = np.array(face['landmarks'])
x1, y1, x2, y2 = map(int, bbox)
# Get the normalized embedding for the current face
embedding = recognizer.get_normalized_embedding(frame, landmarks)
# Compare with the reference embedding
sim = compute_similarity(ref_embedding, embedding)
# Draw results
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
cv2.imshow("Face Recognition", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
parser.add_argument(
"--detector",
type=str,
default="scrfd",
choices=['retinaface', 'scrfd'],
help="Face detection method."
)
parser.add_argument(
"--recognizer",
type=str,
default="arcface",
choices=['arcface', 'mobileface', 'sphereface'],
help="Face recognition method."
)
args = parser.parse_args()
print("Initializing models...")
detector = create_detector(method=args.detector)
recognizer = create_recognizer(method=args.recognizer)
print("Extracting reference embedding...")
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
run_video(detector, recognizer, ref_embedding)
if __name__ == "__main__":
main()

View File

@@ -2,18 +2,21 @@ import cv2
import argparse
import numpy as np
from uniface.detection import RetinaFace
from uniface.constants import RetinaFaceWeights
from uniface.recognition import ArcFace
# Use the new high-level factory functions for consistency
from uniface.detection import create_detector
from uniface.recognition import create_recognizer
# Import enums for argument choices
from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
def run_inference(detector, recognizer, image_path):
def run_inference(detector, recognizer, image_path: str):
"""
Detect faces and extract embeddings from a single image.
Args:
detector (RetinaFace): Initialized face detector.
recognizer (ArcFace): Face recognition model.
detector: Initialized face detector.
recognizer: Initialized face recognition model.
image_path (str): Path to the input image.
"""
image = cv2.imread(image_path)
@@ -21,36 +24,53 @@ def run_inference(detector, recognizer, image_path):
print(f"Error: Failed to load image from '{image_path}'")
return
boxes, landmarks = detector.detect(image)
faces = detector.detect(image)
if len(boxes) == 0:
if not faces:
print("No faces detected.")
return
print(f"Detected {len(boxes)} face(s). Extracting embeddings...")
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
for i, landmark in enumerate(landmarks[:1]):
embedding = recognizer.get_embedding(image, landmark)
norm_embedding = recognizer.get_normalized_embedding(image, landmark)
print("embedding:", np.sum(embedding))
print("norm embedding:",np.sum(norm_embedding))
# Process the first detected face
first_face = faces[0]
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
# Extract embedding using the landmarks from the face dictionary
embedding = recognizer.get_embedding(image, landmarks)
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
# Print some info about the embeddings
print(f" - Embedding shape: {embedding.shape}")
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
def main():
parser = argparse.ArgumentParser(description="Extract face embeddings from a single image.")
parser.add_argument("--image", type=str, required=True, help="Path to the input image.")
parser.add_argument(
"--model",
"--detector",
type=str,
default="MNET_V2",
choices=[m.name for m in RetinaFaceWeights],
help="RetinaFace model variant to use."
default="retinaface",
choices=['retinaface', 'scrfd'],
help="Face detection method to use."
)
parser.add_argument(
"--recognizer",
type=str,
default="arcface",
choices=['arcface', 'mobileface', 'sphereface'],
help="Face recognition method to use."
)
args = parser.parse_args()
detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
recognizer = ArcFace()
print(f"Initializing detector: {args.detector}")
detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2)
print(f"Initializing recognizer: {args.recognizer}")
recognizer = create_recognizer(method=args.recognizer)
run_inference(detector, recognizer, args.image)

View File

@@ -1,69 +0,0 @@
import cv2
import argparse
import numpy as np
from uniface.detection import RetinaFace
from uniface.constants import RetinaFaceWeights
from uniface.recognition import ArcFace
from uniface.face_utils import compute_similarity
def extract_reference_embedding(detector, recognizer, image_path):
image = cv2.imread(image_path)
if image is None:
raise RuntimeError(f"Failed to load image: {image_path}")
boxes, landmarks = detector.detect(image)
if len(boxes) == 0:
raise RuntimeError("No faces found in reference image.")
embedding = recognizer.get_embedding(image, landmarks[0])
return embedding
def run_video(detector, recognizer, ref_embedding, threshold=0.30):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Webcam could not be opened.")
while True:
ret, frame = cap.read()
if not ret:
break
boxes, landmarks = detector.detect(frame)
for box, lm in zip(boxes, landmarks):
x1, y1, x2, y2 = map(int, box[:4])
embedding = recognizer.get_embedding(frame, lm)
sim = compute_similarity(ref_embedding, embedding)
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
cv2.putText(frame, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
cv2.imshow("Face Recognition", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
parser.add_argument("--model", type=str, default="MNET_V2",
choices=[m.name for m in RetinaFaceWeights], help="Face detector model.")
args = parser.parse_args()
detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
recognizer = ArcFace()
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
run_video(detector, recognizer, ref_embedding)
if __name__ == "__main__":
main()

View File

@@ -15,7 +15,10 @@ __license__ = "MIT"
__author__ = "Yakhyokhuja Valikhujaev"
__version__ = "0.1.8"
from .detection import detect_faces, create_detector, list_available_detectors
from .recognition import create_recognizer
from .landmark import create_landmarker
from uniface.face_utils import face_alignment, compute_similarity
from uniface.model_store import verify_model_weights
@@ -25,22 +28,20 @@ from uniface.log import Logger
__all__ = [
# Metadata
"__version__",
"__author__",
"__license__",
'__author__',
'__license__',
'__version__',
# Core functions
'detect_faces',
'create_detector',
'create_landmarker',
'create_recognizer',
'detect_faces',
'list_available_detectors',
# Utility functions
"face_alignment",
"compute_similarity",
"verify_model_weights",
"draw_detections",
'compute_similarity',
'draw_detections',
'face_alignment',
'verify_model_weights',
# Classes
"Logger",
'Logger'
]

View File

@@ -1 +1,32 @@
from .model import Landmark
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from .models import Landmark106
from .base import BaseLandmarker
def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker:
"""
Factory function to create facial landmark predictors.
Args:
method (str): Landmark prediction method. Options: '106'.
**kwargs: Model-specific parameters.
Returns:
Initialized landmarker instance.
"""
method = method.lower()
if method == 'insightface_106':
return Landmark106(**kwargs)
else:
available = ['insightface_106']
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
"create_landmarker",
"Landmark106",
"BaseLandmarker"
]

30
uniface/landmark/base.py Normal file
View File

@@ -0,0 +1,30 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
import numpy as np
class BaseLandmarker(ABC):
"""
Abstract Base Class for all facial landmark models.
"""
@abstractmethod
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""
Predicts facial landmarks for a given face bounding box.
This method defines the standard interface for all landmark predictors.
It takes a full image and a bounding box for a single face and returns
the predicted keypoints for that face.
Args:
image (np.ndarray): The full source image in BGR format.
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
Returns:
np.ndarray: An array of predicted landmark points with shape (N, 2),
where N is the number of landmarks.
"""
raise NotImplementedError

View File

@@ -1,247 +0,0 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
import onnxruntime as ort
from typing import Tuple
from uniface.log import Logger
from uniface.constants import LandmarkWeights
from uniface.model_store import verify_model_weights
from uniface.face_utils import bbox_center_alignment, transform_points_2d
__all__ = ['Landmark']
class Landmark:
"""
Facial landmark detection model for predicting 106 facial keypoints using ONNX model.
This class wraps a pretrained facial landmark model to detect 106 key facial points
such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box.
It handles model verification, input preprocessing, ONNX inference execution,
and projection of landmark coordinates back to the original image space.
Attributes:
input_size (Tuple[int, int]): Model's expected input resolution (width, height).
input_mean (float): Mean value used for input normalization.
input_std (float): Standard deviation used for input normalization.
model_path (str): Path to the verified ONNX model file.
session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
input_names (List[str]): List of input node names.
output_names (List[str]): List of output node names.
lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y).
lmk_num (int): Total number of landmark points predicted by the model (106).
Args:
model_name (LandmarkWeights): Enum specifying the landmark model to load.
input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192).
"""
def __init__(
self,
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192)
) -> None:
"""
Initializes the Facial Landmark model for inference.
Args:
model_name: Enum specifying which landmark model weights to use
input_size: Input resolution for the model (width, height)
"""
Logger.info(
f"Initializing Facial Landmark with model={model_name}, "
f"input_size={input_size}"
)
# Initialize configuration
self.input_size = input_size
self.input_std = 1.0
self.input_mean = 0.0
# Get path to model weights
self.model_path = verify_model_weights(model_name)
Logger.info(f"Verified model weights located at: {self.model_path}")
# Initialize model
self._initialize_model()
def _initialize_model(self):
"""
Initialize the ONNX model from the stored model path.
Raises:
RuntimeError: If the model fails to load or initialize.
"""
try:
self.session = ort.InferenceSession(
self.model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
# Get input configuration
input_metadata = self.session.get_inputs()[0]
input_shape = input_metadata.shape
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
# Get input/output names
self.input_names = [input.name for input in self.session.get_inputs()]
self.output_names = [output.name for output in self.session.get_outputs()]
# Determine landmark dimensions from output shape
output_shape = self.session.get_outputs()[0].shape
self.lmk_dim = 2 # x,y coordinates
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
except Exception as e:
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize landmark model: {e}")
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Preprocess the input image and bounding box for inference.
Args:
image: Input image in BGR format
bbox: Bounding box coordinates [x1, y1, x2, y2]
Returns:
Tuple containing:
- Preprocessed image blob ready for inference
- Transformation matrix for mapping predictions back to original image
"""
# Calculate face dimensions and center
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
# Determine scale to fit face with some margin
scale = self.input_size[0] / (max(width, height) * 1.5)
rotation = 0.0
# Align face using center, scale and rotation
aligned_face, transform_matrix = bbox_center_alignment(
image, center, self.input_size[0], scale, rotation
)
# Convert to blob format for inference
face_blob = cv2.dnn.blobFromImage(
aligned_face,
1.0 / self.input_std,
self.input_size,
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
)
return face_blob, transform_matrix
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
"""
Convert raw model predictions to image coordinates.
Args:
predictions: Raw landmark coordinates from model output
transform_matrix: Affine transformation matrix from preprocessing
Returns:
Landmarks in original image coordinates
"""
# Reshape to pairs of x,y coordinates
landmarks = predictions.reshape((-1, 2))
# Denormalize coordinates to pixel space
landmarks[:, 0:2] += 1 # Shift from [-1,1] to [0,2] range
landmarks[:, 0:2] *= (self.input_size[0] // 2) # Scale to pixel coordinates
# Invert the transformation to map back to original image
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
landmarks = transform_points_2d(landmarks, inverse_matrix)
return landmarks
def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""
Predict facial landmarks for the given image and face bounding box.
Args:
image: Input image in BGR format
bbox: Face bounding box [x1, y1, x2, y2]
Returns:
Array of facial landmarks in original image coordinates
"""
# Preprocess image
face_blob, transform_matrix = self.preprocess(image, bbox)
# Run inference
raw_predictions = self.session.run(
self.output_names,
{self.input_names[0]: face_blob}
)[0][0]
# Postprocess to get landmarks in original image space
landmarks = self.postprocess(raw_predictions, transform_matrix)
return landmarks
# TODO: For testing purposes only, remote later
if __name__ == "__main__":
from uniface.detection import RetinaFace
from uniface.constants import RetinaFaceWeights
face_detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
post_nms_topk=750,
dynamic_size=False,
input_size=(640, 640)
)
model = Landmark()
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Webcam not available.")
exit()
print("Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print("Frame capture failed.")
break
boxes, landmarks = face_detector.detect(frame)
if boxes is None or len(boxes) == 0:
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
continue
for box in boxes:
x1, y1, x2, y2, score = box.astype(int)
lmk = model.predict(frame, box[:4])
for (x, y) in lmk.astype(int):
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

217
uniface/landmark/models.py Normal file
View File

@@ -0,0 +1,217 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
import cv2
import numpy as np
import onnxruntime as ort
from typing import Tuple
from uniface.log import Logger
from uniface.constants import LandmarkWeights
from uniface.model_store import verify_model_weights
from uniface.face_utils import bbox_center_alignment, transform_points_2d
from .base import BaseLandmarker
__all__ = ['Landmark']
class Landmark106(BaseLandmarker):
"""Facial landmark model for predicting 106 facial keypoints.
This class implements the BaseLandmarker and provides an end-to-end
pipeline for 106-point facial landmark detection. It handles model
loading, preprocessing of a face crop based on a bounding box,
inference, and post-processing to map landmarks back to the
original image coordinates.
Args:
model_name (LandmarkWeights): The enum specifying the landmark model to load.
Defaults to `LandmarkWeights.DEFAULT`.
input_size (Tuple[int, int]): The resolution (width, height) for the model's
input. Defaults to (192, 192).
Example:
>>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
>>> # bbox = [x1, y1, x2, y2]
>>>
>>> landmarker = Landmark106()
>>> landmarks = landmarker.get_landmarks(image, bbox)
>>> print(landmarks.shape)
(106, 2)
"""
def __init__(
self,
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192)
) -> None:
Logger.info(
f"Initializing Facial Landmark with model={model_name}, "
f"input_size={input_size}"
)
self.input_size = input_size
self.input_std = 1.0
self.input_mean = 0.0
self.model_path = verify_model_weights(model_name)
self._initialize_model()
def _initialize_model(self):
"""
Initialize the ONNX model from the stored model path.
Raises:
RuntimeError: If the model fails to load or initialize.
"""
try:
self.session = ort.InferenceSession(
self.model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
# Get input configuration
input_metadata = self.session.get_inputs()[0]
input_shape = input_metadata.shape
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
# Get input/output names
self.input_names = [input.name for input in self.session.get_inputs()]
self.output_names = [output.name for output in self.session.get_outputs()]
# Determine landmark dimensions from output shape
output_shape = self.session.get_outputs()[0].shape
self.lmk_dim = 2 # x,y coordinates
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
except Exception as e:
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize landmark model: {e}")
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Prepares a face crop for inference.
This method takes a face bounding box, performs a center alignment to
warp the face into the model's required input size, and then creates
a normalized blob ready for the ONNX session.
Args:
image (np.ndarray): The full source image in BGR format.
bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
Returns:
Tuple[np.ndarray, np.ndarray]: A tuple containing:
- The preprocessed image blob ready for inference.
- The affine transformation matrix used for alignment.
"""
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
scale = self.input_size[0] / (max(width, height) * 1.5)
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
face_blob = cv2.dnn.blobFromImage(
aligned_face, 1.0 / self.input_std, self.input_size,
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
)
return face_blob, transform_matrix
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
"""Converts raw model predictions back to original image coordinates.
This method reshapes the model's flat output array into landmark points,
denormalizes them to the model's input space, and then applies an
inverse affine transformation to map them back to the original image space.
Args:
predictions (np.ndarray): Raw landmark coordinates from the model output.
transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
Returns:
np.ndarray: An array of landmark points in the original image's coordinates.
"""
landmarks = predictions.reshape((-1, 2))
landmarks[:, 0:2] += 1
landmarks[:, 0:2] *= (self.input_size[0] // 2)
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
landmarks = transform_points_2d(landmarks, inverse_matrix)
return landmarks
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""Predicts facial landmarks for the given image and face bounding box.
This is the main public method that orchestrates the full pipeline of
preprocessing, inference, and post-processing.
Args:
image (np.ndarray): The full source image in BGR format.
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
Returns:
np.ndarray: An array of predicted landmark points with shape (106, 2).
"""
face_blob, transform_matrix = self.preprocess(image, bbox)
raw_predictions = self.session.run(
self.output_names, {self.input_names[0]: face_blob}
)[0][0]
landmarks = self.postprocess(raw_predictions, transform_matrix)
return landmarks
# TODO: For testing purposes only, remote later
if __name__ == "__main__":
# UPDATED: Use the high-level factory functions
from uniface.detection import create_detector
from uniface.landmark import create_landmarker
# 1. Create the detector and landmarker using the new API
face_detector = create_detector('retinaface')
landmarker = create_landmarker() # Uses the default '106' method
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Webcam not available.")
exit()
print("Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print("Frame capture failed.")
break
# 2. The detect method returns a list of dictionaries
faces = face_detector.detect(frame)
if not faces:
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
continue
# 3. Loop through the list of face dictionaries
for face in faces:
# Extract the bounding box
bbox = face['bbox']
# 4. Get landmarks for the current face using its bounding box
landmarks = landmarker.get_landmarks(frame, bbox)
# --- Drawing Logic ---
# Draw the landmarks
for (x, y) in landmarks.astype(int):
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
# Draw the bounding box
x1, y1, x2, y2 = map(int, bbox)
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

View File

@@ -1,2 +1,63 @@
from .base import PreprocessConfig
from .models import SphereFace, MobileFace, ArcFace
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Dict
from .models import ArcFace, MobileFace, SphereFace
from .base import BaseRecognizer
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
"""
Factory function to create face recognizers.
This function initializes and returns a face recognizer instance based on the
specified method. It acts as a high-level interface to the underlying
model classes like ArcFace, MobileFace, etc.
Args:
method (str): The recognition method to use.
Options: 'arcface' (default), 'mobileface', 'sphereface'.
**kwargs: Model-specific parameters passed to the recognizer's constructor.
For example, `model_name` can be used to select a specific
pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`).
Returns:
BaseRecognizer: An initialized recognizer instance ready for use.
Raises:
ValueError: If the specified `method` is not supported.
Examples:
>>> # Create the default ArcFace recognizer
>>> recognizer = create_recognizer()
>>> # Create a specific MobileFace recognizer
>>> from uniface.constants import MobileFaceWeights
>>> recognizer = create_recognizer(
... 'mobileface',
... model_name=MobileFaceWeights.MNET_V2
... )
>>> # Create a SphereFace recognizer
>>> recognizer = create_recognizer('sphereface')
"""
method = method.lower()
if method == 'arcface':
return ArcFace(**kwargs)
elif method == 'mobileface':
return MobileFace(**kwargs)
elif method == 'sphereface':
return SphereFace(**kwargs)
else:
available = ['arcface', 'mobileface', 'sphereface']
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
"create_recognizer",
"ArcFace",
"MobileFace",
"SphereFace",
"BaseRecognizer",
]

View File

@@ -2,20 +2,15 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
import cv2
import numpy as np
import onnxruntime as ort
from dataclasses import dataclass
from typing import Tuple, Union, List
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.face_utils import face_alignment
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
__all__ = ["BaseModel", "PreprocessConfig"]
@dataclass
@@ -28,38 +23,25 @@ class PreprocessConfig:
input_size: Tuple[int, int] = (112, 112)
class BaseModel:
class BaseRecognizer(ABC):
"""
Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace).
Abstract Base Class for all face recognition models.
It provides the core functionality for preprocessing, inference, and embedding extraction.
"""
def __init__(
self,
model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2,
preprocessing: PreprocessConfig = PreprocessConfig(),
) -> None:
@abstractmethod
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
"""
Initializes the FaceEncoder model for inference.
Initializes the model. Subclasses must call this.
Args:
model_name: Selected model weight enum.
preprocessing: Configuration for input normalization and resizing.
model_path (str): The direct path to the verified ONNX model.
preprocessing (PreprocessConfig): The configuration for preprocessing.
"""
# Store preprocessing parameters
self.input_mean = preprocessing.input_mean
self.input_std = preprocessing.input_std
self.input_size = preprocessing.input_size
Logger.info(
f"Initializing Face Recognition with model={model_name}, "
f"input_mean={self.input_mean}, input_std={self.input_std}, "
f"input_size={self.input_size}"
)
# Get path to model weights and initialize model
self.model_path = verify_model_weights(model_name)
Logger.info(f"Verified model weights located at: {self.model_path}")
self.model_path = model_path
self._initialize_model()
def _initialize_model(self) -> None:
@@ -152,14 +134,15 @@ class BaseModel:
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
"""
Extracts l2 normalized face embedding vector from an image
Extracts a l2 normalized face embedding vector from an image.
Args:
image: Input face image (BGR format).
landmarks: Facial landmarks (5 points for alignment).
Returns:
Normalied face embedding vector (typically 512-dimensional).
Normalized face embedding vector (typically 512-dimensional).
"""
embedding = self.get_embedding(image, landmarks)
return embedding / np.linalg.norm(embedding)
norm = np.linalg.norm(embedding)
return embedding / norm if norm > 0 else embedding

View File

@@ -4,27 +4,35 @@
from typing import Optional
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
from .base import BaseModel, PreprocessConfig
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
from uniface.model_store import verify_model_weights
from .base import BaseRecognizer, PreprocessConfig
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
__all__ = ["SphereFace", "MobileFace", "ArcFace"]
class ArcFace(BaseRecognizer):
"""ArcFace model for robust face recognition.
class SphereFace(BaseModel):
"""
SphereFace face encoder class.
This class loads a SphereFace model for face embedding extraction.
It supports configurable preprocessing, with a default mean/std and input size of 112x112.
This class provides a concrete implementation of the BaseRecognizer,
pre-configured for ArcFace models. It handles the loading of specific
ArcFace weights and sets up the appropriate default preprocessing.
Args:
model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20.
preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization.
model_name (ArcFaceWeights): The specific ArcFace model variant to use.
Defaults to `ArcFaceWeights.MNET`.
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
configuration. If None, a default config for ArcFace is used.
Example:
>>> from uniface.recognition import ArcFace
>>> recognizer = ArcFace()
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
"""
def __init__(
self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
self,
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
@@ -33,23 +41,32 @@ class SphereFace(BaseModel):
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_name=model_name, preprocessing=preprocessing)
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
class MobileFace(BaseModel):
"""
MobileFace face encoder class.
class MobileFace(BaseRecognizer):
"""Lightweight MobileFaceNet model for fast face recognition.
Loads a lightweight MobileFaceNet model for fast face embedding extraction.
Default input normalization and resizing applied if preprocessing is not provided.
This class provides a concrete implementation of the BaseRecognizer,
pre-configured for MobileFaceNet models. It is optimized for speed,
making it suitable for edge devices.
Args:
model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2.
preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size.
model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use.
Defaults to `MobileFaceWeights.MNET_V2`.
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
configuration. If None, a default config for MobileFaceNet is used.
Example:
>>> from uniface.recognition import MobileFace
>>> recognizer = MobileFace()
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
"""
def __init__(
self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
self,
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
@@ -58,23 +75,32 @@ class MobileFace(BaseModel):
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_name=model_name)
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
class ArcFace(BaseModel):
"""
ArcFace face encoder class.
class SphereFace(BaseRecognizer):
"""SphereFace model using angular margin for face recognition.
Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation.
Applies standard preprocessing unless overridden.
This class provides a concrete implementation of the BaseRecognizer,
pre-configured for SphereFace models, which were among the first to
introduce angular margin loss functions.
Args:
model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET.
preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified.
model_name (SphereFaceWeights): The specific SphereFace model variant to use.
Defaults to `SphereFaceWeights.SPHERE20`.
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
configuration. If None, a default config for SphereFace is used.
Example:
>>> from uniface.recognition import SphereFace
>>> recognizer = SphereFace()
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
"""
def __init__(
self, model_name: ArcFaceWeights = ArcFaceWeights.MNET,
self,
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
@@ -83,4 +109,6 @@ class ArcFace(BaseModel):
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_name=model_name)
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)

View File

@@ -4,42 +4,47 @@
import cv2
import numpy as np
from typing import List, Union
def draw_detections(image, detections, vis_threshold: float = 0.6):
def draw_detections(
image: np.ndarray,
bboxes: Union[np.ndarray, List[List[float]]],
scores: Union[np.ndarray, List[float]],
landmarks: Union[np.ndarray, List[List[List[float]]]],
vis_threshold: float = 0.6
):
"""
Draw bounding boxes and landmarks on the image with thickness scaled by bbox size.
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
Args:
image (ndarray): Image to draw detections on.
detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
vis_threshold (float): Confidence threshold for filtering detections.
image (np.ndarray): The image to draw on.
bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
scores (list or np.ndarray): A list of confidence scores.
landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
vis_threshold (float): Confidence threshold for filtering which detections to draw.
"""
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
# Unpack detections
boxes, landmarks = detections
scores = boxes[:, 4]
# Filter detections by score
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
# Filter detections by confidence threshold
filtered = scores >= vis_threshold
boxes = boxes[filtered, :4].astype(np.int32)
landmarks = landmarks[filtered]
scores = scores[filtered]
# Draw the filtered detections
for i in keep_indices:
bbox = np.array(bboxes[i], dtype=np.int32)
score = scores[i]
landmark_set = np.array(landmarks[i], dtype=np.int32)
# Draw bounding boxes, scores, and landmarks
for box, score, landmark in zip(boxes, scores, landmarks):
# Calculate thickness proportional to the bbox size
thickness = max(1, int(min(box[2] - box[0], box[3] - box[1]) / 100))
# Calculate adaptive thickness
thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100))
# Draw rectangle
cv2.rectangle(image, tuple(box[:2]), tuple(box[2:]), (0, 0, 255), thickness)
# Draw bounding box
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
# Draw score
cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12),
cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)
# Draw landmarks
for point, color in zip(landmark, _colors):
cv2.circle(image, tuple(point), thickness, color, -1)
for j, point in enumerate(landmark_set):
cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)