feat: Add face attribute detection model

This commit is contained in:
yakhyo
2025-04-02 16:31:54 +09:00
parent d1830c7058
commit 85cf413cb8
4 changed files with 146 additions and 1 deletions

View File

100
uniface/attribute/models.py Normal file
View File

@@ -0,0 +1,100 @@
import cv2
import numpy as np
import onnxruntime
from typing import Tuple
from uniface.face_utils import bbox_center_alignment
__all__ = ["Attribute"]
class Attribute:
"""
Age and Gender Prediction Model.
"""
def __init__(self, model_path: str) -> None:
"""
Initializes the Attribute model for inference.
Args:
model_path (str): Path to the ONNX file.
"""
self.model_path = model_path
self.input_std = 1.0
self.input_mean = 0.0
self._initialize_model(model_path=model_path)
def _initialize_model(self, model_path: str):
"""Initialize the model from the given path.
Args:
model_path (str): Path to .onnx model.
"""
try:
self.session = onnxruntime.InferenceSession(
model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
# Get model info
metadata = self.session.get_inputs()[0]
input_shape = metadata.shape
self.input_size = tuple(input_shape[2:4][::-1])
self.input_names = [x.name for x in self.session.get_inputs()]
self.output_names = [x.name for x in self.session.get_outputs()]
except Exception as e:
print(f"Failed to load the model: {e}")
raise
def preprocess(self, image: np.ndarray, bbox: np.ndarray):
"""Preprocessing
Args:
image (np.ndarray): Numpy image
bbox (np.ndarray): Bounding box coordinates: [x1, y1, x2, y2]
Returns:
np.ndarray: Transformed image
"""
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
scale = self.input_size[0] / (max(width, height)*1.5)
rotation = 0.0
transformed_image, M = bbox_center_alignment(image, center, self.input_size[0], scale, rotation)
input_size = tuple(transformed_image.shape[0:2][::-1])
blob = cv2.dnn.blobFromImage(
transformed_image,
1.0/self.input_std,
input_size,
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True
)
return blob
def postprocess(self, predictions: np.ndarray) -> Tuple[np.int64, int]:
"""Postprocessing
Args:
predictions (np.ndarray): Model predictions, shape: [1, 3]
Returns:
Tuple[np.int64, int]: Gender and Age values
"""
gender = np.argmax(predictions[:2])
age = int(np.round(predictions[2]*100))
return gender, age
def get(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.int64, int]:
blob = self.preprocess(image, bbox)
predictions = self.session.run(self.output_names, {self.input_names[0]: blob})[0][0]
gender, age = self.postprocess(predictions)
return gender, age

View File

@@ -96,3 +96,48 @@ def compute_similarity(feat1: np.ndarray, feat2: np.ndarray) -> np.float32:
feat2 = feat2.ravel()
similarity = np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2))
return similarity
def bbox_center_alignment(image, center, output_size, scale, rotation):
"""
Apply center-based alignment, scaling, and rotation to an image.
Args:
image (np.ndarray): Input image.
center (Tuple[float, float]): Center point (e.g., face center from bbox).
output_size (int): Desired output image size (square).
scale (float): Scaling factor to zoom in/out.
rotation (float): Rotation angle in degrees (clockwise).
Returns:
cropped (np.ndarray): Aligned and cropped image.
M (np.ndarray): 2x3 affine transform matrix used.
"""
# Convert rotation from degrees to radians
rot = float(rotation) * np.pi / 180.0
# Scale the image
t1 = trans.SimilarityTransform(scale=scale)
# Translate the center point to the origin (after scaling)
cx = center[0] * scale
cy = center[1] * scale
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
# Apply rotation around origin (center of face)
t3 = trans.SimilarityTransform(rotation=rot)
# Translate origin to center of output image
t4 = trans.SimilarityTransform(translation=(output_size / 2, output_size / 2))
# Combine all transformations in order: scale → center shift → rotate → recentralize
t = t1 + t2 + t3 + t4
# Extract 2x3 affine matrix
M = t.params[0:2]
# Warp the image using OpenCV
cropped = cv2.warpAffine(image, M, (output_size, output_size), borderValue=0.0)
return cropped, M

View File

@@ -32,7 +32,7 @@ class BaseFaceEncoder:
Initializes the FaceEncoder model for inference.
Args:
model_path (str): Path to the ONNX model file.
model_path (Optional[SphereFaceWeights | MobileFaceWeights]): Path to the ONNX model file.
"""
self.input_mean = 127.5
self.input_std = 127.5