From 67bb13c082a91b617fee63a02149fa0e9645a76f Mon Sep 17 00:00:00 2001 From: yakhyo Date: Fri, 4 Apr 2025 16:26:38 +0900 Subject: [PATCH] feat: Add support for different mean/std --- uniface/landmark/__init__.py | 0 uniface/recognition/__init__.py | 1 + uniface/recognition/base.py | 60 +++++++++++++++++++++++---------- uniface/recognition/models.py | 37 +++++++++++++++++--- 4 files changed, 76 insertions(+), 22 deletions(-) create mode 100644 uniface/landmark/__init__.py diff --git a/uniface/landmark/__init__.py b/uniface/landmark/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/uniface/recognition/__init__.py b/uniface/recognition/__init__.py index 011662b..83825c1 100644 --- a/uniface/recognition/__init__.py +++ b/uniface/recognition/__init__.py @@ -1 +1,2 @@ +from .base import PreprocessConfig from .models import SphereFace, MobileFace, ArcFace diff --git a/uniface/recognition/base.py b/uniface/recognition/base.py index 5be708e..c793569 100644 --- a/uniface/recognition/base.py +++ b/uniface/recognition/base.py @@ -7,7 +7,8 @@ import cv2 import numpy as np import onnxruntime as ort -from typing import Tuple, Optional +from typing import Tuple, Optional, Union, List +from dataclasses import dataclass from uniface.face_utils import compute_similarity, face_alignment @@ -16,7 +17,17 @@ from uniface.constants import SphereFaceWeights, MobileFaceWeights from uniface.logger import Logger -__all__ = ["BaseFaceEncoder"] +__all__ = ["BaseFaceEncoder", "PreprocessConfig"] + + +@dataclass +class PreprocessConfig: + """ + Configuration for preprocessing images before feeding them into the model. + """ + input_mean: Union[float, List[float]] = 127.5 + input_std: Union[float, List[float]] = 127.5 + input_size: Tuple[int, int] = (112, 112) class BaseFaceEncoder: @@ -26,7 +37,8 @@ class BaseFaceEncoder: def __init__( self, - model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2 + model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2, + preprocessing: PreprocessConfig = PreprocessConfig(), ) -> None: """ Initializes the FaceEncoder model for inference. @@ -34,8 +46,9 @@ class BaseFaceEncoder: Args: model_path (Optional[SphereFaceWeights | MobileFaceWeights]): Path to the ONNX model file. """ - self.input_mean = 127.5 - self.input_std = 127.5 + self.input_mean = preprocessing.input_mean + self.input_std = preprocessing.input_std + self.input_size = input_size.input_size # Get path to model weights self._model_path = verify_model_weights(model_path) @@ -70,15 +83,16 @@ class BaseFaceEncoder: """ input_cfg = self.session.get_inputs()[0] input_shape = input_cfg.shape + model_input_size = tuple(input_shape[2:4][::-1]) # (width, height) + + if model_input_size != self.input_size: + Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}") self.input_name = input_cfg.name - self.input_size = tuple(input_shape[2:4][::-1]) # (width, height) - - outputs = self.session.get_outputs() - self.output_names = [output.name for output in outputs] + self.output_names = [output.name for output in self.session.get_outputs()] + self.output_shape = self.session.get_outputs()[0].shape assert len(self.output_names) == 1, "Expected only one output node." - self.output_shape = outputs[0].shape def preprocess(self, image: np.ndarray) -> np.ndarray: """ @@ -91,13 +105,25 @@ class BaseFaceEncoder: np.ndarray: Preprocessed image as a NumPy array ready for inference. """ image = cv2.resize(image, self.input_size) # Resize to (112, 112) - blob = cv2.dnn.blobFromImage( - image, - scalefactor=1.0 / self.input_std, - size=self.input_size, - mean=(self.input_mean, self.input_mean, self.input_mean), - swapRB=True # Convert BGR to RGB - ) + if isinstance(self.input_std, (list, tuple)): + # if self.input_std is a list, we assume it's per-channel std + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32) + + image -= np.array(self.input_mean, dtype=np.float32) + image /= np.array(self.input_std, dtype=np.float32) + + # Change to NCHW (batch, channels, height, width) + blob = np.transpose(image, (2, 0, 1)) # CHW + blob = np.expand_dims(blob, axis=0) # NCHW + else: + # cv2.dnn.blobFromImage does not support per-channel std so we use a single value here + blob = cv2.dnn.blobFromImage( + image, + scalefactor=1.0 / self.input_std, + size=self.input_size, + mean=(self.input_mean, self.input_mean, self.input_mean), + swapRB=True # Convert BGR to RGB + ) return blob def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray: diff --git a/uniface/recognition/models.py b/uniface/recognition/models.py index ebaf9ee..597d1b2 100644 --- a/uniface/recognition/models.py +++ b/uniface/recognition/models.py @@ -4,7 +4,7 @@ from typing import Optional -from .base import BaseFaceEncoder +from .base import BaseFaceEncoder, PreprocessConfig from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights @@ -12,15 +12,42 @@ __all__ = ["SphereFace", "MobileFace", "ArcFace"] class SphereFace(BaseFaceEncoder): - def __init__(self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20) -> None: - super().__init__(model_path=model_path) + def __init__( + self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20, + preprocessing: Optional[PreprocessConfig] = None + ) -> None: + if preprocessing is None: + preprocessing = PreprocessConfig( + input_mean=127.5, + input_std=127.5, + input_size=(112, 112) + ) + super().__init__(model_path=model_path, preprocessing=preprocessing) class MobileFace(BaseFaceEncoder): - def __init__(self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2) -> None: + def __init__( + self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2, + preprocessing: Optional[PreprocessConfig] = None + ) -> None: + if preprocessing is None: + preprocessing = PreprocessConfig( + input_mean=127.5, + input_std=127.5, + input_size=(112, 112) + ) super().__init__(model_path=model_path) class ArcFace(BaseFaceEncoder): - def __init__(self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET) -> None: + def __init__( + self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET, + preprocessing: Optional[PreprocessConfig] = None + ) -> None: + if preprocessing is None: + preprocessing = PreprocessConfig( + input_mean=127.5, + input_std=127.5, + input_size=(112, 112) + ) super().__init__(model_path=model_path)