feat: Add support for different mean/std

This commit is contained in:
yakhyo
2025-04-04 16:26:38 +09:00
parent f9b4ea492b
commit 67bb13c082
4 changed files with 76 additions and 22 deletions

View File

View File

@@ -1 +1,2 @@
from .base import PreprocessConfig
from .models import SphereFace, MobileFace, ArcFace

View File

@@ -7,7 +7,8 @@ import cv2
import numpy as np
import onnxruntime as ort
from typing import Tuple, Optional
from typing import Tuple, Optional, Union, List
from dataclasses import dataclass
from uniface.face_utils import compute_similarity, face_alignment
@@ -16,7 +17,17 @@ from uniface.constants import SphereFaceWeights, MobileFaceWeights
from uniface.logger import Logger
__all__ = ["BaseFaceEncoder"]
__all__ = ["BaseFaceEncoder", "PreprocessConfig"]
@dataclass
class PreprocessConfig:
"""
Configuration for preprocessing images before feeding them into the model.
"""
input_mean: Union[float, List[float]] = 127.5
input_std: Union[float, List[float]] = 127.5
input_size: Tuple[int, int] = (112, 112)
class BaseFaceEncoder:
@@ -26,7 +37,8 @@ class BaseFaceEncoder:
def __init__(
self,
model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2
model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2,
preprocessing: PreprocessConfig = PreprocessConfig(),
) -> None:
"""
Initializes the FaceEncoder model for inference.
@@ -34,8 +46,9 @@ class BaseFaceEncoder:
Args:
model_path (Optional[SphereFaceWeights | MobileFaceWeights]): Path to the ONNX model file.
"""
self.input_mean = 127.5
self.input_std = 127.5
self.input_mean = preprocessing.input_mean
self.input_std = preprocessing.input_std
self.input_size = input_size.input_size
# Get path to model weights
self._model_path = verify_model_weights(model_path)
@@ -70,15 +83,16 @@ class BaseFaceEncoder:
"""
input_cfg = self.session.get_inputs()[0]
input_shape = input_cfg.shape
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
if model_input_size != self.input_size:
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
self.input_name = input_cfg.name
self.input_size = tuple(input_shape[2:4][::-1]) # (width, height)
outputs = self.session.get_outputs()
self.output_names = [output.name for output in outputs]
self.output_names = [output.name for output in self.session.get_outputs()]
self.output_shape = self.session.get_outputs()[0].shape
assert len(self.output_names) == 1, "Expected only one output node."
self.output_shape = outputs[0].shape
def preprocess(self, image: np.ndarray) -> np.ndarray:
"""
@@ -91,13 +105,25 @@ class BaseFaceEncoder:
np.ndarray: Preprocessed image as a NumPy array ready for inference.
"""
image = cv2.resize(image, self.input_size) # Resize to (112, 112)
blob = cv2.dnn.blobFromImage(
image,
scalefactor=1.0 / self.input_std,
size=self.input_size,
mean=(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
)
if isinstance(self.input_std, (list, tuple)):
# if self.input_std is a list, we assume it's per-channel std
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image -= np.array(self.input_mean, dtype=np.float32)
image /= np.array(self.input_std, dtype=np.float32)
# Change to NCHW (batch, channels, height, width)
blob = np.transpose(image, (2, 0, 1)) # CHW
blob = np.expand_dims(blob, axis=0) # NCHW
else:
# cv2.dnn.blobFromImage does not support per-channel std so we use a single value here
blob = cv2.dnn.blobFromImage(
image,
scalefactor=1.0 / self.input_std,
size=self.input_size,
mean=(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
)
return blob
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:

View File

@@ -4,7 +4,7 @@
from typing import Optional
from .base import BaseFaceEncoder
from .base import BaseFaceEncoder, PreprocessConfig
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
@@ -12,15 +12,42 @@ __all__ = ["SphereFace", "MobileFace", "ArcFace"]
class SphereFace(BaseFaceEncoder):
def __init__(self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20) -> None:
super().__init__(model_path=model_path)
def __init__(
self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_path=model_path, preprocessing=preprocessing)
class MobileFace(BaseFaceEncoder):
def __init__(self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2) -> None:
def __init__(
self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_path=model_path)
class ArcFace(BaseFaceEncoder):
def __init__(self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET) -> None:
def __init__(
self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
super().__init__(model_path=model_path)