chore: Code style formatting changes

This commit is contained in:
yakhyo
2025-11-25 23:45:50 +09:00
parent 15947eb605
commit 0771a7959a
20 changed files with 288 additions and 243 deletions

View File

@@ -27,7 +27,13 @@ try:
from .attribute import Emotion
except ImportError:
Emotion = None # PyTorch not installed
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
from .detection import (
SCRFD,
RetinaFace,
create_detector,
detect_faces,
list_available_detectors,
)
from .landmark import Landmark106, create_landmarker
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer

View File

@@ -13,18 +13,14 @@ from uniface.constants import AgeGenderWeights, DDAMFNWeights
# Emotion requires PyTorch - make it optional
try:
from uniface.attribute.emotion import Emotion
_EMOTION_AVAILABLE = True
except ImportError:
Emotion = None
_EMOTION_AVAILABLE = False
# Public API for the attribute module
__all__ = [
"AgeGender",
"Emotion",
"create_attribute_predictor",
"predict_attributes"
]
__all__ = ["AgeGender", "Emotion", "create_attribute_predictor", "predict_attributes"]
# A mapping from model enums to their corresponding attribute classes
_ATTRIBUTE_MODELS = {
@@ -36,10 +32,7 @@ if _EMOTION_AVAILABLE:
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
def create_attribute_predictor(
model_name: Union[AgeGenderWeights, DDAMFNWeights],
**kwargs: Any
) -> Attribute:
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
"""
Factory function to create an attribute predictor instance.
@@ -60,17 +53,16 @@ def create_attribute_predictor(
model_class = _ATTRIBUTE_MODELS.get(model_name)
if model_class is None:
raise ValueError(f"Unsupported attribute model: {model_name}. "
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
raise ValueError(
f"Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights."
)
# Pass model_name to the constructor, as some classes might need it
return model_class(model_name=model_name, **kwargs)
def predict_attributes(
image: np.ndarray,
detections: List[Dict[str, np.ndarray]],
predictor: Attribute
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
) -> List[Dict[str, Any]]:
"""
High-level API to predict attributes for multiple detected faces.
@@ -92,16 +84,16 @@ def predict_attributes(
"""
for face in detections:
# Initialize attributes dict if it doesn't exist
if 'attributes' not in face:
face['attributes'] = {}
if "attributes" not in face:
face["attributes"] = {}
if isinstance(predictor, AgeGender):
gender, age = predictor(image, face['bbox'])
face['attributes']['gender'] = gender
face['attributes']['age'] = age
gender, age = predictor(image, face["bbox"])
face["attributes"]["gender"] = gender
face["attributes"]["age"] = age
elif isinstance(predictor, Emotion):
emotion, confidence = predictor(image, face['landmark'])
face['attributes']['emotion'] = emotion
face['attributes']['confidence'] = confidence
emotion, confidence = predictor(image, face["landmark"])
face["attributes"]["emotion"] = emotion
face["attributes"]["confidence"] = confidence
return detections

View File

@@ -51,8 +51,11 @@ class AgeGender(Attribute):
self.output_names = [output.name for output in self.session.get_outputs()]
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
except Exception as e:
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
Logger.error(
f"Failed to load AgeGender model from '{self.model_path}'",
exc_info=True,
)
raise RuntimeError(f"Failed to initialize AgeGender model: {e}") from e
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
"""
@@ -76,7 +79,11 @@ class AgeGender(Attribute):
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
blob = cv2.dnn.blobFromImage(
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
aligned_face,
scalefactor=1.0,
size=self.input_size[::-1],
mean=(0.0, 0.0, 0.0),
swapRB=True,
)
return blob
@@ -157,7 +164,15 @@ if __name__ == "__main__":
# Prepare text and draw on the frame
label = f"{gender}, {age}"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(0, 255, 0),
2,
)
# Display the resulting frame
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)

View File

@@ -44,7 +44,15 @@ class Emotion(Attribute):
self.model_path = verify_model_weights(model_weights)
# Define emotion labels based on the selected model
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
self.emotion_labels = [
"Neutral",
"Happy",
"Sad",
"Surprise",
"Fear",
"Disgust",
"Angry",
]
if model_weights == DDAMFNWeights.AFFECNET8:
self.emotion_labels.append("Contempt")
@@ -64,7 +72,7 @@ class Emotion(Attribute):
Logger.info(f"Successfully initialized Emotion model on {self.device}")
except Exception as e:
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
raise RuntimeError(f"Failed to initialize Emotion model: {e}") from e
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
"""
@@ -116,8 +124,8 @@ class Emotion(Attribute):
# TODO: below is only for testing, remove it later
if __name__ == "__main__":
from uniface.detection import create_detector
from uniface.constants import RetinaFaceWeights
from uniface.detection import create_detector
print("Initializing models for live inference...")
# 1. Initialize the face detector
@@ -146,8 +154,8 @@ if __name__ == "__main__":
# For each detected face, predict the emotion
for detection in detections:
box = detection['bbox']
landmark = detection['landmarks']
box = detection["bbox"]
landmark = detection["landmarks"]
x1, y1, x2, y2 = map(int, box)
# Predict attributes using the landmark
@@ -156,13 +164,21 @@ if __name__ == "__main__":
# Prepare text and draw on the frame
label = f"{emotion} ({confidence:.2f})"
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
cv2.putText(
frame,
label,
(x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(255, 0, 0),
2,
)
# Display the resulting frame
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
# Release resources

View File

@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
min_sizes = [[16, 32], [64, 128], [256, 512]]
anchors = []
feature_maps = [
[
math.ceil(image_size[0] / step),
math.ceil(image_size[1] / step)
] for step in steps
]
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
for k, (map_height, map_width) in enumerate(feature_maps):
step = steps[k]

View File

@@ -83,83 +83,65 @@ class LandmarkWeights(str, Enum):
MODEL_URLS: Dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
RetinaFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx",
RetinaFaceWeights.MNET_050: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx",
RetinaFaceWeights.MNET_V1: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx",
RetinaFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx",
RetinaFaceWeights.RESNET18: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx",
RetinaFaceWeights.RESNET34: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx",
# MobileFace
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
MobileFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx",
MobileFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx",
MobileFaceWeights.MNET_V3_SMALL: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx",
MobileFaceWeights.MNET_V3_LARGE: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx",
# SphereFace
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
SphereFaceWeights.SPHERE20: "https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx",
SphereFaceWeights.SPHERE36: "https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx",
# ArcFace
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
ArcFaceWeights.MNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx",
ArcFaceWeights.RESNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx",
# SCRFD
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
SCRFDWeights.SCRFD_10G_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx",
SCRFDWeights.SCRFD_500M_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx",
# DDAFM
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
DDAMFNWeights.AFFECNET7: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script",
DDAMFNWeights.AFFECNET8: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script",
# AgeGender
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
AgeGenderWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx",
# Landmarks
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
LandmarkWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx",
}
MODEL_SHA256: Dict[Enum, str] = {
# RetinaFace
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
RetinaFaceWeights.MNET_025: "b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5",
RetinaFaceWeights.MNET_050: "d8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37",
RetinaFaceWeights.MNET_V1: "75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153",
RetinaFaceWeights.MNET_V2: "3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757",
RetinaFaceWeights.RESNET18: "e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d",
RetinaFaceWeights.RESNET34: "bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630",
# MobileFace
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
MobileFaceWeights.MNET_025: "eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b",
MobileFaceWeights.MNET_V2: "38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89",
MobileFaceWeights.MNET_V3_SMALL: "d4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3",
MobileFaceWeights.MNET_V3_LARGE: "0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a",
# SphereFace
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
SphereFaceWeights.SPHERE20: "c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d",
SphereFaceWeights.SPHERE36: "13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20",
# ArcFace
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
ArcFaceWeights.MNET: "9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f",
ArcFaceWeights.RESNET: "4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43",
# SCRFD
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
SCRFDWeights.SCRFD_10G_KPS: "5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91",
SCRFDWeights.SCRFD_500M_KPS: "5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a",
# DDAFM
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
DDAMFNWeights.AFFECNET7: "10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d",
DDAMFNWeights.AFFECNET8: "8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487",
# AgeGender
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
AgeGenderWeights.DEFAULT: "4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb",
# Landmark
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
LandmarkWeights.DEFAULT: "f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf",
}
CHUNK_SIZE = 8192

View File

@@ -15,7 +15,7 @@ from .scrfd import SCRFD
_detector_cache: Dict[str, BaseDetector] = {}
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
def detect_faces(image: np.ndarray, method: str = "retinaface", **kwargs) -> List[Dict[str, Any]]:
"""
High-level face detection function.
@@ -51,7 +51,7 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
return detector.detect(image)
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
def create_detector(method: str = "retinaface", **kwargs) -> BaseDetector:
"""
Factory function to create face detectors.
@@ -89,18 +89,15 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
"""
method = method.lower()
if method == 'retinaface':
if method == "retinaface":
return RetinaFace(**kwargs)
elif method == 'scrfd':
elif method == "scrfd":
return SCRFD(**kwargs)
else:
available_methods = ['retinaface', 'scrfd']
raise ValueError(
f"Unsupported detection method: '{method}'. "
f"Available methods: {available_methods}"
)
available_methods = ["retinaface", "scrfd"]
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
@@ -111,36 +108,36 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
Dict[str, Dict[str, Any]]: Dictionary of detector information
"""
return {
'retinaface': {
'description': 'RetinaFace detector with high accuracy',
'supports_landmarks': True,
'paper': 'https://arxiv.org/abs/1905.00641',
'default_params': {
'model_name': 'mnet_v2',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'input_size': (640, 640)
}
"retinaface": {
"description": "RetinaFace detector with high accuracy",
"supports_landmarks": True,
"paper": "https://arxiv.org/abs/1905.00641",
"default_params": {
"model_name": "mnet_v2",
"conf_thresh": 0.5,
"nms_thresh": 0.4,
"input_size": (640, 640),
},
},
"scrfd": {
"description": "SCRFD detector - fast and accurate with efficient architecture",
"supports_landmarks": True,
"paper": "https://arxiv.org/abs/2105.04714",
"default_params": {
"model_name": "scrfd_10g_kps",
"conf_thresh": 0.5,
"nms_thresh": 0.4,
"input_size": (640, 640),
},
},
'scrfd': {
'description': 'SCRFD detector - fast and accurate with efficient architecture',
'supports_landmarks': True,
'paper': 'https://arxiv.org/abs/2105.04714',
'default_params': {
'model_name': 'scrfd_10g_kps',
'conf_thresh': 0.5,
'nms_thresh': 0.4,
'input_size': (640, 640)
}
}
}
__all__ = [
'detect_faces',
'create_detector',
'list_available_detectors',
'SCRFD',
'RetinaFace',
'BaseDetector',
"detect_faces",
"create_detector",
"list_available_detectors",
"SCRFD",
"RetinaFace",
"BaseDetector",
]

View File

@@ -85,7 +85,7 @@ class BaseDetector(ABC):
Returns:
bool: True if landmarks are supported, False otherwise
"""
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
return hasattr(self, "_supports_landmarks") and self._supports_landmarks
def get_info(self) -> Dict[str, Any]:
"""
@@ -95,7 +95,7 @@ class BaseDetector(ABC):
Dict[str, Any]: Detector information
"""
return {
'name': self.__class__.__name__,
'supports_landmarks': self._supports_landmarks,
'config': self.config
"name": self.__class__.__name__,
"supports_landmarks": self._supports_landmarks,
"config": self.config,
}

View File

@@ -12,7 +12,13 @@ from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
from .base import BaseDetector
from .utils import decode_boxes, decode_landmarks, generate_anchors, non_max_supression, resize_image
from .utils import (
decode_boxes,
decode_landmarks,
generate_anchors,
non_max_supression,
resize_image,
)
class RetinaFace(BaseDetector):
@@ -53,13 +59,13 @@ class RetinaFace(BaseDetector):
super().__init__(**kwargs)
self._supports_landmarks = True # RetinaFace supports landmarks
self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2)
self.conf_thresh = kwargs.get('conf_thresh', 0.5)
self.nms_thresh = kwargs.get('nms_thresh', 0.4)
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
self.post_nms_topk = kwargs.get('post_nms_topk', 750)
self.dynamic_size = kwargs.get('dynamic_size', False)
self.input_size = kwargs.get('input_size', (640, 640))
self.model_name = kwargs.get("model_name", RetinaFaceWeights.MNET_V2)
self.conf_thresh = kwargs.get("conf_thresh", 0.5)
self.nms_thresh = kwargs.get("nms_thresh", 0.4)
self.pre_nms_topk = kwargs.get("pre_nms_topk", 5000)
self.post_nms_topk = kwargs.get("post_nms_topk", 750)
self.dynamic_size = kwargs.get("dynamic_size", False)
self.input_size = kwargs.get("input_size", (640, 640))
Logger.info(
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
@@ -127,7 +133,7 @@ class RetinaFace(BaseDetector):
image: np.ndarray,
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2.0
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -172,14 +178,16 @@ class RetinaFace(BaseDetector):
# Calculate offsets from image center
center = (original_height // 2, original_width // 2)
offsets = np.vstack([
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
])
offsets = np.vstack(
[
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
]
)
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
# Calculate scores based on the chosen metric
if metric == 'max':
if metric == "max":
scores = areas
else:
scores = areas - offset_dist_squared * center_weight
@@ -193,15 +201,17 @@ class RetinaFace(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4].astype(float).tolist(),
'confidence': detections[i, 4].item(),
'landmarks': landmarks[i].astype(float).tolist()
"bbox": detections[i, :4].astype(float).tolist(),
"confidence": detections[i, 4].item(),
"landmarks": landmarks[i].astype(float).tolist(),
}
faces.append(face_dict)
return faces
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
def postprocess(
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
) -> Tuple[np.ndarray, np.ndarray]:
"""
Process the model outputs into final detection results.
@@ -220,7 +230,11 @@ class RetinaFace(BaseDetector):
- landmarks (np.ndarray): Array of detected facial landmarks.
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
"""
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
loc, conf, landmarks = (
outputs[0].squeeze(0),
outputs[1].squeeze(0),
outputs[2].squeeze(0),
)
# Decode boxes and landmarks
boxes = decode_boxes(loc, self._priors)
@@ -236,7 +250,7 @@ class RetinaFace(BaseDetector):
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
# Sort by scores
order = scores.argsort()[::-1][:self.pre_nms_topk]
order = scores.argsort()[::-1][: self.pre_nms_topk]
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
# Apply NMS
@@ -245,13 +259,22 @@ class RetinaFace(BaseDetector):
detections, landmarks = detections[keep], landmarks[keep]
# Keep top-k detections
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
detections, landmarks = (
detections[: self.post_nms_topk],
landmarks[: self.post_nms_topk],
)
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
return detections, landmarks
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
def _scale_detections(
self,
boxes: np.ndarray,
landmarks: np.ndarray,
resize_factor: float,
shape: Tuple[int, int],
) -> Tuple[np.ndarray, np.ndarray]:
# Scale bounding boxes and landmarks to the original image size.
bbox_scale = np.array([shape[0], shape[1]] * 2)
boxes = boxes * bbox_scale / resize_factor
@@ -270,12 +293,13 @@ def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
for (x, y) in points.astype(np.int32):
for x, y in points.astype(np.int32):
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
if __name__ == "__main__":
import cv2
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
print(detector.get_info())
cap = cv2.VideoCapture(0)
@@ -298,9 +322,9 @@ if __name__ == "__main__":
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
bbox = face["bbox"] # [x1, y1, x2, y2]
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
confidence = face["confidence"]
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
@@ -312,8 +336,15 @@ if __name__ == "__main__":
draw_keypoints(frame, points)
# Display face count
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):

View File

@@ -173,7 +173,11 @@ class SCRFD(BaseDetector):
return scores_list, bboxes_list, kpss_list
def detect(
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
self,
image: np.ndarray,
max_num: int = 0,
metric: Literal["default", "max"] = "max",
center_weight: float = 2,
) -> List[Dict[str, Any]]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -311,7 +315,15 @@ if __name__ == "__main__":
draw_keypoints(frame, points)
# Display face count
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.putText(
frame,
f"Faces: {len(faces)}",
(10, 30),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(255, 255, 255),
2,
)
cv2.imshow("FaceDetection", frame)
if cv2.waitKey(1) & 0xFF == ord("q"):

View File

@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
min_sizes = [[16, 32], [64, 128], [256, 512]]
anchors = []
feature_maps = [
[
math.ceil(image_size[0] / step),
math.ceil(image_size[1] / step)
] for step in steps
]
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
for k, (map_height, map_width) in enumerate(feature_maps):
step = steps[k]

View File

@@ -8,7 +8,12 @@ import cv2
import numpy as np
from skimage.transform import SimilarityTransform
__all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"]
__all__ = [
"face_alignment",
"compute_similarity",
"bbox_center_alignment",
"transform_points_2d",
]
# Reference alignment for facial landmarks (ArcFace)
@@ -18,9 +23,9 @@ reference_alignment: np.ndarray = np.array(
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041]
[70.7299, 92.2041],
],
dtype=np.float32
dtype=np.float32,
)
@@ -72,7 +77,11 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
return matrix, inverse_matrix
def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
def face_alignment(
image: np.ndarray,
landmark: np.ndarray,
image_size: Union[int, Tuple[int, int]] = 112,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Align the face in the input image based on the given facial landmarks.

View File

@@ -6,7 +6,7 @@ from .base import BaseLandmarker
from .models import Landmark106
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
def create_landmarker(method: str = "2d106det", **kwargs) -> BaseLandmarker:
"""
Factory function to create facial landmark predictors.
@@ -18,15 +18,11 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
Initialized landmarker instance.
"""
method = method.lower()
if method == '2d106det':
if method == "2d106det":
return Landmark106(**kwargs)
else:
available = ['2d106det']
available = ["2d106det"]
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
__all__ = [
"create_landmarker",
"Landmark106",
"BaseLandmarker"
]
__all__ = ["create_landmarker", "Landmark106", "BaseLandmarker"]

View File

@@ -11,6 +11,7 @@ class BaseLandmarker(ABC):
"""
Abstract Base Class for all facial landmark models.
"""
@abstractmethod
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
"""

View File

@@ -15,7 +15,7 @@ from uniface.onnx_utils import create_onnx_session
from .base import BaseLandmarker
__all__ = ['Landmark']
__all__ = ["Landmark"]
class Landmark106(BaseLandmarker):
@@ -42,15 +42,13 @@ class Landmark106(BaseLandmarker):
>>> print(landmarks.shape)
(106, 2)
"""
def __init__(
self,
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
input_size: Tuple[int, int] = (192, 192)
input_size: Tuple[int, int] = (192, 192),
) -> None:
Logger.info(
f"Initializing Facial Landmark with model={model_name}, "
f"input_size={input_size}"
)
Logger.info(f"Initializing Facial Landmark with model={model_name}, input_size={input_size}")
self.input_size = input_size
self.input_std = 1.0
self.input_mean = 0.0
@@ -85,7 +83,7 @@ class Landmark106(BaseLandmarker):
except Exception as e:
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize landmark model: {e}")
raise RuntimeError(f"Failed to initialize landmark model: {e}") from e
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Prepares a face crop for inference.
@@ -110,8 +108,11 @@ class Landmark106(BaseLandmarker):
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
face_blob = cv2.dnn.blobFromImage(
aligned_face, 1.0 / self.input_std, self.input_size,
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
aligned_face,
1.0 / self.input_std,
self.input_size,
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True,
)
return face_blob, transform_matrix
@@ -131,7 +132,7 @@ class Landmark106(BaseLandmarker):
"""
landmarks = predictions.reshape((-1, 2))
landmarks[:, 0:2] += 1
landmarks[:, 0:2] *= (self.input_size[0] // 2)
landmarks[:, 0:2] *= self.input_size[0] // 2
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
landmarks = transform_points_2d(landmarks, inverse_matrix)
@@ -151,14 +152,11 @@ class Landmark106(BaseLandmarker):
np.ndarray: An array of predicted landmark points with shape (106, 2).
"""
face_blob, transform_matrix = self.preprocess(image, bbox)
raw_predictions = self.session.run(
self.output_names, {self.input_names[0]: face_blob}
)[0][0]
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
landmarks = self.postprocess(raw_predictions, transform_matrix)
return landmarks
# Testing code
if __name__ == "__main__":
from uniface.detection import RetinaFace
@@ -185,21 +183,21 @@ if __name__ == "__main__":
if not faces:
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
continue
# 3. Loop through the list of face dictionaries
for face in faces:
# Extract the bounding box
bbox = face['bbox']
bbox = face["bbox"]
# 4. Get landmarks for the current face using its bounding box
landmarks = landmarker.get_landmarks(frame, bbox)
# --- Drawing Logic ---
# Draw the landmarks
for (x, y) in landmarks.astype(int):
for x, y in landmarks.astype(int):
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
# Draw the bounding box
@@ -207,7 +205,7 @@ if __name__ == "__main__":
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Facial Landmark Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()

View File

@@ -19,10 +19,7 @@ def enable_logging(level=logging.INFO):
"""
Logger.handlers.clear()
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(
"%(asctime)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
))
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
Logger.addHandler(handler)
Logger.setLevel(level)
Logger.propagate = False

View File

@@ -11,10 +11,10 @@ from tqdm import tqdm
import uniface.constants as const
from uniface.log import Logger
__all__ = ['verify_model_weights']
__all__ = ["verify_model_weights"]
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
def verify_model_weights(model_name: str, root: str = "~/.uniface/models") -> str:
"""
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
@@ -53,7 +53,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
raise ValueError(f"No URL found for model '{model_name}'")
file_ext = os.path.splitext(url)[1]
model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
model_path = os.path.normpath(os.path.join(root, f"{model_name.value}{file_ext}"))
if not os.path.exists(model_path):
Logger.info(f"Downloading model '{model_name}' from {url}")
@@ -62,7 +62,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
except Exception as e:
Logger.error(f"Failed to download model '{model_name}': {e}")
raise ConnectionError(f"Download failed for '{model_name}'")
raise ConnectionError(f"Download failed for '{model_name}'") from e
expected_hash = const.MODEL_SHA256.get(model_name)
if expected_hash and not verify_file_hash(model_path, expected_hash):
@@ -78,18 +78,21 @@ def download_file(url: str, dest_path: str) -> None:
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(dest_path, "wb") as file, tqdm(
desc=f"Downloading {dest_path}",
unit='B',
unit_scale=True,
unit_divisor=1024
) as progress:
with (
open(dest_path, "wb") as file,
tqdm(
desc=f"Downloading {dest_path}",
unit="B",
unit_scale=True,
unit_divisor=1024,
) as progress,
):
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
if chunk:
file.write(chunk)
progress.update(len(chunk))
except requests.RequestException as e:
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
raise ConnectionError(f"Failed to download file from {url}. Error: {e}") from e
def verify_file_hash(file_path: str, expected_hash: str) -> bool:

View File

@@ -19,6 +19,7 @@ class PreprocessConfig:
"""
Configuration for preprocessing images before feeding them into the model.
"""
input_mean: Union[float, List[float]] = 127.5
input_std: Union[float, List[float]] = 127.5
input_size: Tuple[int, int] = (112, 112)
@@ -29,6 +30,7 @@ class BaseRecognizer(ABC):
Abstract Base Class for all face recognition models.
It provides the core functionality for preprocessing, inference, and embedding extraction.
"""
@abstractmethod
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
"""
@@ -74,7 +76,10 @@ class BaseRecognizer(ABC):
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
except Exception as e:
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
Logger.error(
f"Failed to load face encoder model from '{self.model_path}'",
exc_info=True,
)
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
@@ -92,8 +97,9 @@ class BaseRecognizer(ABC):
if isinstance(self.input_std, (list, tuple)):
# Per-channel normalization
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
np.array(self.input_std, dtype=np.float32)
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
self.input_std, dtype=np.float32
)
# Change to NCHW (batch, channels, height, width)
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
@@ -105,7 +111,7 @@ class BaseRecognizer(ABC):
scalefactor=1.0 / self.input_std,
size=self.input_size,
mean=(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
swapRB=True, # Convert BGR to RGB
)
return blob

View File

@@ -34,14 +34,10 @@ class ArcFace(BaseRecognizer):
def __init__(
self,
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
@@ -68,14 +64,10 @@ class MobileFace(BaseRecognizer):
def __init__(
self,
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)
@@ -102,14 +94,10 @@ class SphereFace(BaseRecognizer):
def __init__(
self,
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
preprocessing: Optional[PreprocessConfig] = None
preprocessing: Optional[PreprocessConfig] = None,
) -> None:
if preprocessing is None:
preprocessing = PreprocessConfig(
input_mean=127.5,
input_std=127.5,
input_size=(112, 112)
)
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
model_path = verify_model_weights(model_name)
super().__init__(model_path=model_path, preprocessing=preprocessing)

View File

@@ -44,7 +44,13 @@ def draw_detections(
# Draw score
cv2.putText(
image, f"{score:.2f}", (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness
image,
f"{score:.2f}",
(bbox[0], bbox[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
thickness,
)
# Draw landmarks