mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 00:52:25 +00:00
- add dynamic onnx provider selection for m1/m2/m3/m4 macs - replace mkdocs with simple markdown files - fix model download and scrfd detection issues - update ci/cd workflows
146 lines
5.3 KiB
Python
146 lines
5.3 KiB
Python
# Copyright 2025 Yakhyokhuja Valikhujaev
|
|
# Author: Yakhyokhuja Valikhujaev
|
|
# GitHub: https://github.com/yakhyo
|
|
|
|
from abc import ABC, abstractmethod
|
|
import cv2
|
|
import numpy as np
|
|
from dataclasses import dataclass
|
|
from typing import Tuple, Union, List
|
|
|
|
from uniface.log import Logger
|
|
from uniface.face_utils import face_alignment
|
|
from uniface.onnx_utils import create_onnx_session
|
|
|
|
|
|
@dataclass
|
|
class PreprocessConfig:
|
|
"""
|
|
Configuration for preprocessing images before feeding them into the model.
|
|
"""
|
|
input_mean: Union[float, List[float]] = 127.5
|
|
input_std: Union[float, List[float]] = 127.5
|
|
input_size: Tuple[int, int] = (112, 112)
|
|
|
|
|
|
class BaseRecognizer(ABC):
|
|
"""
|
|
Abstract Base Class for all face recognition models.
|
|
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
|
"""
|
|
@abstractmethod
|
|
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
|
"""
|
|
Initializes the model. Subclasses must call this.
|
|
|
|
Args:
|
|
model_path (str): The direct path to the verified ONNX model.
|
|
preprocessing (PreprocessConfig): The configuration for preprocessing.
|
|
"""
|
|
self.input_mean = preprocessing.input_mean
|
|
self.input_std = preprocessing.input_std
|
|
self.input_size = preprocessing.input_size
|
|
|
|
self.model_path = model_path
|
|
self._initialize_model()
|
|
|
|
def _initialize_model(self) -> None:
|
|
"""
|
|
Loads the ONNX model and prepares it for inference.
|
|
|
|
Raises:
|
|
RuntimeError: If the model fails to load or initialize.
|
|
"""
|
|
try:
|
|
# Initialize model session with available providers
|
|
self.session = create_onnx_session(self.model_path)
|
|
|
|
# Extract input configuration
|
|
input_cfg = self.session.get_inputs()[0]
|
|
self.input_name = input_cfg.name
|
|
|
|
# Verify input dimensions match our configuration
|
|
input_shape = input_cfg.shape
|
|
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
|
if model_input_size != self.input_size:
|
|
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
|
|
|
|
# Extract output configuration
|
|
self.output_names = [output.name for output in self.session.get_outputs()]
|
|
self.output_shape = self.session.get_outputs()[0].shape
|
|
|
|
assert len(self.output_names) == 1, "Expected only one output node."
|
|
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
|
|
|
|
except Exception as e:
|
|
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
|
|
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
|
|
|
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Preprocess the image: resize, normalize, and convert it to a blob.
|
|
|
|
Args:
|
|
face_img: Input image in BGR format.
|
|
|
|
Returns:
|
|
Preprocessed image as a NumPy array ready for inference.
|
|
"""
|
|
resized_img = cv2.resize(face_img, self.input_size)
|
|
|
|
if isinstance(self.input_std, (list, tuple)):
|
|
# Per-channel normalization
|
|
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
|
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
|
|
np.array(self.input_std, dtype=np.float32)
|
|
|
|
# Change to NCHW (batch, channels, height, width)
|
|
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
|
blob = np.expand_dims(blob, axis=0) # NCHW
|
|
else:
|
|
# Single-value normalization
|
|
blob = cv2.dnn.blobFromImage(
|
|
resized_img,
|
|
scalefactor=1.0 / self.input_std,
|
|
size=self.input_size,
|
|
mean=(self.input_mean, self.input_mean, self.input_mean),
|
|
swapRB=True # Convert BGR to RGB
|
|
)
|
|
|
|
return blob
|
|
|
|
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Extracts face embedding from an image.
|
|
|
|
Args:
|
|
image: Input face image (BGR format).
|
|
landmarks: Facial landmarks (5 points for alignment).
|
|
|
|
Returns:
|
|
Face embedding vector (typically 512-dimensional).
|
|
"""
|
|
# Align face using landmarks
|
|
aligned_face, _ = face_alignment(image, landmarks)
|
|
|
|
# Generate embedding from aligned face
|
|
face_blob = self.preprocess(aligned_face)
|
|
embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
|
|
|
|
return embedding
|
|
|
|
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
|
"""
|
|
Extracts a l2 normalized face embedding vector from an image.
|
|
|
|
Args:
|
|
image: Input face image (BGR format).
|
|
landmarks: Facial landmarks (5 points for alignment).
|
|
|
|
Returns:
|
|
Normalized face embedding vector (typically 512-dimensional).
|
|
"""
|
|
embedding = self.get_embedding(image, landmarks)
|
|
norm = np.linalg.norm(embedding)
|
|
return embedding / norm if norm > 0 else embedding
|