mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
feat: Add support for different mean/std
This commit is contained in:
0
uniface/landmark/__init__.py
Normal file
0
uniface/landmark/__init__.py
Normal file
@@ -1 +1,2 @@
|
||||
from .base import PreprocessConfig
|
||||
from .models import SphereFace, MobileFace, ArcFace
|
||||
|
||||
@@ -7,7 +7,8 @@ import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
from typing import Tuple, Optional
|
||||
from typing import Tuple, Optional, Union, List
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
@@ -16,7 +17,17 @@ from uniface.constants import SphereFaceWeights, MobileFaceWeights
|
||||
from uniface.logger import Logger
|
||||
|
||||
|
||||
__all__ = ["BaseFaceEncoder"]
|
||||
__all__ = ["BaseFaceEncoder", "PreprocessConfig"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseFaceEncoder:
|
||||
@@ -26,7 +37,8 @@ class BaseFaceEncoder:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2
|
||||
model_path: Optional[SphereFaceWeights | MobileFaceWeights] = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: PreprocessConfig = PreprocessConfig(),
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the FaceEncoder model for inference.
|
||||
@@ -34,8 +46,9 @@ class BaseFaceEncoder:
|
||||
Args:
|
||||
model_path (Optional[SphereFaceWeights | MobileFaceWeights]): Path to the ONNX model file.
|
||||
"""
|
||||
self.input_mean = 127.5
|
||||
self.input_std = 127.5
|
||||
self.input_mean = preprocessing.input_mean
|
||||
self.input_std = preprocessing.input_std
|
||||
self.input_size = input_size.input_size
|
||||
|
||||
# Get path to model weights
|
||||
self._model_path = verify_model_weights(model_path)
|
||||
@@ -70,15 +83,16 @@ class BaseFaceEncoder:
|
||||
"""
|
||||
input_cfg = self.session.get_inputs()[0]
|
||||
input_shape = input_cfg.shape
|
||||
model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
|
||||
if model_input_size != self.input_size:
|
||||
Logger.warning(f"Model input size {model_input_size} differs from configured size {self.input_size}")
|
||||
|
||||
self.input_name = input_cfg.name
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # (width, height)
|
||||
|
||||
outputs = self.session.get_outputs()
|
||||
self.output_names = [output.name for output in outputs]
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
self.output_shape = self.session.get_outputs()[0].shape
|
||||
|
||||
assert len(self.output_names) == 1, "Expected only one output node."
|
||||
self.output_shape = outputs[0].shape
|
||||
|
||||
def preprocess(self, image: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
@@ -91,13 +105,25 @@ class BaseFaceEncoder:
|
||||
np.ndarray: Preprocessed image as a NumPy array ready for inference.
|
||||
"""
|
||||
image = cv2.resize(image, self.input_size) # Resize to (112, 112)
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
image,
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
)
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
# if self.input_std is a list, we assume it's per-channel std
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
|
||||
image -= np.array(self.input_mean, dtype=np.float32)
|
||||
image /= np.array(self.input_std, dtype=np.float32)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(image, (2, 0, 1)) # CHW
|
||||
blob = np.expand_dims(blob, axis=0) # NCHW
|
||||
else:
|
||||
# cv2.dnn.blobFromImage does not support per-channel std so we use a single value here
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
image,
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
)
|
||||
return blob
|
||||
|
||||
def get_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from .base import BaseFaceEncoder
|
||||
from .base import BaseFaceEncoder, PreprocessConfig
|
||||
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
|
||||
|
||||
|
||||
@@ -12,15 +12,42 @@ __all__ = ["SphereFace", "MobileFace", "ArcFace"]
|
||||
|
||||
|
||||
class SphereFace(BaseFaceEncoder):
|
||||
def __init__(self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20) -> None:
|
||||
super().__init__(model_path=model_path)
|
||||
def __init__(
|
||||
self, model_path: Optional[SphereFaceWeights] = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class MobileFace(BaseFaceEncoder):
|
||||
def __init__(self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2) -> None:
|
||||
def __init__(
|
||||
self, model_path: Optional[MobileFaceWeights] = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_path=model_path)
|
||||
|
||||
|
||||
class ArcFace(BaseFaceEncoder):
|
||||
def __init__(self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET) -> None:
|
||||
def __init__(
|
||||
self, model_path: Optional[ArcFaceWeights] = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_path=model_path)
|
||||
|
||||
Reference in New Issue
Block a user