mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
chore: Code style formatting changes
This commit is contained in:
@@ -27,7 +27,13 @@ try:
|
||||
from .attribute import Emotion
|
||||
except ImportError:
|
||||
Emotion = None # PyTorch not installed
|
||||
from .detection import SCRFD, RetinaFace, create_detector, detect_faces, list_available_detectors
|
||||
from .detection import (
|
||||
SCRFD,
|
||||
RetinaFace,
|
||||
create_detector,
|
||||
detect_faces,
|
||||
list_available_detectors,
|
||||
)
|
||||
from .landmark import Landmark106, create_landmarker
|
||||
from .recognition import ArcFace, MobileFace, SphereFace, create_recognizer
|
||||
|
||||
|
||||
@@ -13,18 +13,14 @@ from uniface.constants import AgeGenderWeights, DDAMFNWeights
|
||||
# Emotion requires PyTorch - make it optional
|
||||
try:
|
||||
from uniface.attribute.emotion import Emotion
|
||||
|
||||
_EMOTION_AVAILABLE = True
|
||||
except ImportError:
|
||||
Emotion = None
|
||||
_EMOTION_AVAILABLE = False
|
||||
|
||||
# Public API for the attribute module
|
||||
__all__ = [
|
||||
"AgeGender",
|
||||
"Emotion",
|
||||
"create_attribute_predictor",
|
||||
"predict_attributes"
|
||||
]
|
||||
__all__ = ["AgeGender", "Emotion", "create_attribute_predictor", "predict_attributes"]
|
||||
|
||||
# A mapping from model enums to their corresponding attribute classes
|
||||
_ATTRIBUTE_MODELS = {
|
||||
@@ -36,10 +32,7 @@ if _EMOTION_AVAILABLE:
|
||||
_ATTRIBUTE_MODELS.update({model: Emotion for model in DDAMFNWeights})
|
||||
|
||||
|
||||
def create_attribute_predictor(
|
||||
model_name: Union[AgeGenderWeights, DDAMFNWeights],
|
||||
**kwargs: Any
|
||||
) -> Attribute:
|
||||
def create_attribute_predictor(model_name: Union[AgeGenderWeights, DDAMFNWeights], **kwargs: Any) -> Attribute:
|
||||
"""
|
||||
Factory function to create an attribute predictor instance.
|
||||
|
||||
@@ -60,17 +53,16 @@ def create_attribute_predictor(
|
||||
model_class = _ATTRIBUTE_MODELS.get(model_name)
|
||||
|
||||
if model_class is None:
|
||||
raise ValueError(f"Unsupported attribute model: {model_name}. "
|
||||
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
|
||||
raise ValueError(
|
||||
f"Unsupported attribute model: {model_name}. Please choose from AgeGenderWeights or DDAMFNWeights."
|
||||
)
|
||||
|
||||
# Pass model_name to the constructor, as some classes might need it
|
||||
return model_class(model_name=model_name, **kwargs)
|
||||
|
||||
|
||||
def predict_attributes(
|
||||
image: np.ndarray,
|
||||
detections: List[Dict[str, np.ndarray]],
|
||||
predictor: Attribute
|
||||
image: np.ndarray, detections: List[Dict[str, np.ndarray]], predictor: Attribute
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level API to predict attributes for multiple detected faces.
|
||||
@@ -92,16 +84,16 @@ def predict_attributes(
|
||||
"""
|
||||
for face in detections:
|
||||
# Initialize attributes dict if it doesn't exist
|
||||
if 'attributes' not in face:
|
||||
face['attributes'] = {}
|
||||
if "attributes" not in face:
|
||||
face["attributes"] = {}
|
||||
|
||||
if isinstance(predictor, AgeGender):
|
||||
gender, age = predictor(image, face['bbox'])
|
||||
face['attributes']['gender'] = gender
|
||||
face['attributes']['age'] = age
|
||||
gender, age = predictor(image, face["bbox"])
|
||||
face["attributes"]["gender"] = gender
|
||||
face["attributes"]["age"] = age
|
||||
elif isinstance(predictor, Emotion):
|
||||
emotion, confidence = predictor(image, face['landmark'])
|
||||
face['attributes']['emotion'] = emotion
|
||||
face['attributes']['confidence'] = confidence
|
||||
emotion, confidence = predictor(image, face["landmark"])
|
||||
face["attributes"]["emotion"] = emotion
|
||||
face["attributes"]["confidence"] = confidence
|
||||
|
||||
return detections
|
||||
|
||||
@@ -51,8 +51,11 @@ class AgeGender(Attribute):
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
|
||||
Logger.error(
|
||||
f"Failed to load AgeGender model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize AgeGender model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
|
||||
"""
|
||||
@@ -76,7 +79,11 @@ class AgeGender(Attribute):
|
||||
aligned_face, _ = bbox_center_alignment(image, center, self.input_size[1], scale, rotation)
|
||||
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, scalefactor=1.0, size=self.input_size[::-1], mean=(0.0, 0.0, 0.0), swapRB=True
|
||||
aligned_face,
|
||||
scalefactor=1.0,
|
||||
size=self.input_size[::-1],
|
||||
mean=(0.0, 0.0, 0.0),
|
||||
swapRB=True,
|
||||
)
|
||||
return blob
|
||||
|
||||
@@ -157,7 +164,15 @@ if __name__ == "__main__":
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{gender}, {age}"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(0, 255, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
|
||||
|
||||
@@ -44,7 +44,15 @@ class Emotion(Attribute):
|
||||
self.model_path = verify_model_weights(model_weights)
|
||||
|
||||
# Define emotion labels based on the selected model
|
||||
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
|
||||
self.emotion_labels = [
|
||||
"Neutral",
|
||||
"Happy",
|
||||
"Sad",
|
||||
"Surprise",
|
||||
"Fear",
|
||||
"Disgust",
|
||||
"Angry",
|
||||
]
|
||||
if model_weights == DDAMFNWeights.AFFECNET8:
|
||||
self.emotion_labels.append("Contempt")
|
||||
|
||||
@@ -64,7 +72,7 @@ class Emotion(Attribute):
|
||||
Logger.info(f"Successfully initialized Emotion model on {self.device}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
|
||||
raise RuntimeError(f"Failed to initialize Emotion model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
|
||||
"""
|
||||
@@ -116,8 +124,8 @@ class Emotion(Attribute):
|
||||
|
||||
# TODO: below is only for testing, remove it later
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import create_detector
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.detection import create_detector
|
||||
|
||||
print("Initializing models for live inference...")
|
||||
# 1. Initialize the face detector
|
||||
@@ -146,8 +154,8 @@ if __name__ == "__main__":
|
||||
|
||||
# For each detected face, predict the emotion
|
||||
for detection in detections:
|
||||
box = detection['bbox']
|
||||
landmark = detection['landmarks']
|
||||
box = detection["bbox"]
|
||||
landmark = detection["landmarks"]
|
||||
x1, y1, x2, y2 = map(int, box)
|
||||
|
||||
# Predict attributes using the landmark
|
||||
@@ -156,13 +164,21 @@ if __name__ == "__main__":
|
||||
# Prepare text and draw on the frame
|
||||
label = f"{emotion} ({confidence:.2f})"
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
label,
|
||||
(x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.8,
|
||||
(255, 0, 0),
|
||||
2,
|
||||
)
|
||||
|
||||
# Display the resulting frame
|
||||
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
|
||||
|
||||
# Break the loop if 'q' is pressed
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
# Release resources
|
||||
|
||||
@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
|
||||
@@ -83,83 +83,65 @@ class LandmarkWeights(str, Enum):
|
||||
|
||||
|
||||
MODEL_URLS: Dict[Enum, str] = {
|
||||
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx',
|
||||
RetinaFaceWeights.MNET_050: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx',
|
||||
RetinaFaceWeights.MNET_V1: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx',
|
||||
RetinaFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx',
|
||||
RetinaFaceWeights.RESNET18: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx',
|
||||
RetinaFaceWeights.RESNET34: 'https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx',
|
||||
|
||||
RetinaFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.25.onnx",
|
||||
RetinaFaceWeights.MNET_050: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1_0.50.onnx",
|
||||
RetinaFaceWeights.MNET_V1: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv1.onnx",
|
||||
RetinaFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_mv2.onnx",
|
||||
RetinaFaceWeights.RESNET18: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r18.onnx",
|
||||
RetinaFaceWeights.RESNET34: "https://github.com/yakhyo/uniface/releases/download/weights/retinaface_r34.onnx",
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx',
|
||||
MobileFaceWeights.MNET_V2: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx',
|
||||
MobileFaceWeights.MNET_V3_LARGE: 'https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx',
|
||||
|
||||
MobileFaceWeights.MNET_025: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv1_0.25.onnx",
|
||||
MobileFaceWeights.MNET_V2: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv2.onnx",
|
||||
MobileFaceWeights.MNET_V3_SMALL: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_small.onnx",
|
||||
MobileFaceWeights.MNET_V3_LARGE: "https://github.com/yakhyo/uniface/releases/download/weights/mobilenetv3_large.onnx",
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx',
|
||||
SphereFaceWeights.SPHERE36: 'https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: "https://github.com/yakhyo/uniface/releases/download/weights/sphere20.onnx",
|
||||
SphereFaceWeights.SPHERE36: "https://github.com/yakhyo/uniface/releases/download/weights/sphere36.onnx",
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx',
|
||||
ArcFaceWeights.RESNET: 'https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx',
|
||||
|
||||
ArcFaceWeights.MNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_mbf.onnx",
|
||||
ArcFaceWeights.RESNET: "https://github.com/yakhyo/uniface/releases/download/weights/w600k_r50.onnx",
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx',
|
||||
SCRFDWeights.SCRFD_500M_KPS: 'https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx',
|
||||
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_10g_kps.onnx",
|
||||
SCRFDWeights.SCRFD_500M_KPS: "https://github.com/yakhyo/uniface/releases/download/weights/scrfd_500m_kps.onnx",
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script',
|
||||
DDAMFNWeights.AFFECNET8: 'https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet7.script",
|
||||
DDAMFNWeights.AFFECNET8: "https://github.com/yakhyo/uniface/releases/download/weights/affecnet8.script",
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx',
|
||||
|
||||
AgeGenderWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/genderage.onnx",
|
||||
# Landmarks
|
||||
LandmarkWeights.DEFAULT: 'https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx',
|
||||
LandmarkWeights.DEFAULT: "https://github.com/yakhyo/uniface/releases/download/weights/2d106det.onnx",
|
||||
}
|
||||
|
||||
MODEL_SHA256: Dict[Enum, str] = {
|
||||
# RetinaFace
|
||||
RetinaFaceWeights.MNET_025: 'b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5',
|
||||
RetinaFaceWeights.MNET_050: 'd8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37',
|
||||
RetinaFaceWeights.MNET_V1: '75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153',
|
||||
RetinaFaceWeights.MNET_V2: '3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757',
|
||||
RetinaFaceWeights.RESNET18: 'e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d',
|
||||
RetinaFaceWeights.RESNET34: 'bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630',
|
||||
|
||||
RetinaFaceWeights.MNET_025: "b7a7acab55e104dce6f32cdfff929bd83946da5cd869b9e2e9bdffafd1b7e4a5",
|
||||
RetinaFaceWeights.MNET_050: "d8977186f6037999af5b4113d42ba77a84a6ab0c996b17c713cc3d53b88bfc37",
|
||||
RetinaFaceWeights.MNET_V1: "75c961aaf0aff03d13c074e9ec656e5510e174454dd4964a161aab4fe5f04153",
|
||||
RetinaFaceWeights.MNET_V2: "3ca44c045651cabeed1193a1fae8946ad1f3a55da8fa74b341feab5a8319f757",
|
||||
RetinaFaceWeights.RESNET18: "e8b5ddd7d2c3c8f7c942f9f10cec09d8e319f78f09725d3f709631de34fb649d",
|
||||
RetinaFaceWeights.RESNET34: "bd0263dc2a465d32859555cb1741f2d98991eb0053696e8ee33fec583d30e630",
|
||||
# MobileFace
|
||||
MobileFaceWeights.MNET_025: 'eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b',
|
||||
MobileFaceWeights.MNET_V2: '38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89',
|
||||
MobileFaceWeights.MNET_V3_SMALL: 'd4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3',
|
||||
MobileFaceWeights.MNET_V3_LARGE: '0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a',
|
||||
|
||||
MobileFaceWeights.MNET_025: "eeda7d23d9c2b40cf77fa8da8e895b5697465192648852216074679657f8ee8b",
|
||||
MobileFaceWeights.MNET_V2: "38b148284dd48cc898d5d4453104252fbdcbacc105fe3f0b80e78954d9d20d89",
|
||||
MobileFaceWeights.MNET_V3_SMALL: "d4acafa1039a82957aa8a9a1dac278a401c353a749c39df43de0e29cc1c127c3",
|
||||
MobileFaceWeights.MNET_V3_LARGE: "0e48f8e11f070211716d03e5c65a3db35a5e917cfb5bc30552358629775a142a",
|
||||
# SphereFace
|
||||
SphereFaceWeights.SPHERE20: 'c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d',
|
||||
SphereFaceWeights.SPHERE36: '13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20',
|
||||
|
||||
|
||||
SphereFaceWeights.SPHERE20: "c02878cf658eb1861f580b7e7144b0d27cc29c440bcaa6a99d466d2854f14c9d",
|
||||
SphereFaceWeights.SPHERE36: "13b3890cd5d7dec2b63f7c36fd7ce07403e5a0bbb701d9647c0289e6cbe7bb20",
|
||||
# ArcFace
|
||||
ArcFaceWeights.MNET: '9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f',
|
||||
ArcFaceWeights.RESNET: '4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43',
|
||||
|
||||
ArcFaceWeights.MNET: "9cc6e4a75f0e2bf0b1aed94578f144d15175f357bdc05e815e5c4a02b319eb4f",
|
||||
ArcFaceWeights.RESNET: "4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43",
|
||||
# SCRFD
|
||||
SCRFDWeights.SCRFD_10G_KPS: '5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91',
|
||||
SCRFDWeights.SCRFD_500M_KPS: '5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a',
|
||||
|
||||
SCRFDWeights.SCRFD_10G_KPS: "5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91",
|
||||
SCRFDWeights.SCRFD_500M_KPS: "5e4447f50245bbd7966bd6c0fa52938c61474a04ec7def48753668a9d8b4ea3a",
|
||||
# DDAFM
|
||||
DDAMFNWeights.AFFECNET7: '10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d',
|
||||
DDAMFNWeights.AFFECNET8: '8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487',
|
||||
|
||||
DDAMFNWeights.AFFECNET7: "10535bf8b6afe8e9d6ae26cea6c3add9a93036e9addb6adebfd4a972171d015d",
|
||||
DDAMFNWeights.AFFECNET8: "8c66963bc71db42796a14dfcbfcd181b268b65a3fc16e87147d6a3a3d7e0f487",
|
||||
# AgeGender
|
||||
AgeGenderWeights.DEFAULT: '4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb',
|
||||
|
||||
AgeGenderWeights.DEFAULT: "4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb",
|
||||
# Landmark
|
||||
LandmarkWeights.DEFAULT: 'f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf',
|
||||
LandmarkWeights.DEFAULT: "f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf",
|
||||
}
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
|
||||
@@ -15,7 +15,7 @@ from .scrfd import SCRFD
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
def detect_faces(image: np.ndarray, method: str = "retinaface", **kwargs) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
|
||||
@@ -51,7 +51,7 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
return detector.detect(image)
|
||||
|
||||
|
||||
def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
def create_detector(method: str = "retinaface", **kwargs) -> BaseDetector:
|
||||
"""
|
||||
Factory function to create face detectors.
|
||||
|
||||
@@ -89,18 +89,15 @@ def create_detector(method: str = 'retinaface', **kwargs) -> BaseDetector:
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == 'retinaface':
|
||||
if method == "retinaface":
|
||||
return RetinaFace(**kwargs)
|
||||
|
||||
elif method == 'scrfd':
|
||||
elif method == "scrfd":
|
||||
return SCRFD(**kwargs)
|
||||
|
||||
else:
|
||||
available_methods = ['retinaface', 'scrfd']
|
||||
raise ValueError(
|
||||
f"Unsupported detection method: '{method}'. "
|
||||
f"Available methods: {available_methods}"
|
||||
)
|
||||
available_methods = ["retinaface", "scrfd"]
|
||||
raise ValueError(f"Unsupported detection method: '{method}'. Available methods: {available_methods}")
|
||||
|
||||
|
||||
def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
@@ -111,36 +108,36 @@ def list_available_detectors() -> Dict[str, Dict[str, Any]]:
|
||||
Dict[str, Dict[str, Any]]: Dictionary of detector information
|
||||
"""
|
||||
return {
|
||||
'retinaface': {
|
||||
'description': 'RetinaFace detector with high accuracy',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/1905.00641',
|
||||
'default_params': {
|
||||
'model_name': 'mnet_v2',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
"retinaface": {
|
||||
"description": "RetinaFace detector with high accuracy",
|
||||
"supports_landmarks": True,
|
||||
"paper": "https://arxiv.org/abs/1905.00641",
|
||||
"default_params": {
|
||||
"model_name": "mnet_v2",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"input_size": (640, 640),
|
||||
},
|
||||
},
|
||||
"scrfd": {
|
||||
"description": "SCRFD detector - fast and accurate with efficient architecture",
|
||||
"supports_landmarks": True,
|
||||
"paper": "https://arxiv.org/abs/2105.04714",
|
||||
"default_params": {
|
||||
"model_name": "scrfd_10g_kps",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"input_size": (640, 640),
|
||||
},
|
||||
},
|
||||
'scrfd': {
|
||||
'description': 'SCRFD detector - fast and accurate with efficient architecture',
|
||||
'supports_landmarks': True,
|
||||
'paper': 'https://arxiv.org/abs/2105.04714',
|
||||
'default_params': {
|
||||
'model_name': 'scrfd_10g_kps',
|
||||
'conf_thresh': 0.5,
|
||||
'nms_thresh': 0.4,
|
||||
'input_size': (640, 640)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'list_available_detectors',
|
||||
'SCRFD',
|
||||
'RetinaFace',
|
||||
'BaseDetector',
|
||||
"detect_faces",
|
||||
"create_detector",
|
||||
"list_available_detectors",
|
||||
"SCRFD",
|
||||
"RetinaFace",
|
||||
"BaseDetector",
|
||||
]
|
||||
|
||||
@@ -85,7 +85,7 @@ class BaseDetector(ABC):
|
||||
Returns:
|
||||
bool: True if landmarks are supported, False otherwise
|
||||
"""
|
||||
return hasattr(self, '_supports_landmarks') and self._supports_landmarks
|
||||
return hasattr(self, "_supports_landmarks") and self._supports_landmarks
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -95,7 +95,7 @@ class BaseDetector(ABC):
|
||||
Dict[str, Any]: Detector information
|
||||
"""
|
||||
return {
|
||||
'name': self.__class__.__name__,
|
||||
'supports_landmarks': self._supports_landmarks,
|
||||
'config': self.config
|
||||
"name": self.__class__.__name__,
|
||||
"supports_landmarks": self._supports_landmarks,
|
||||
"config": self.config,
|
||||
}
|
||||
|
||||
@@ -12,7 +12,13 @@ from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseDetector
|
||||
from .utils import decode_boxes, decode_landmarks, generate_anchors, non_max_supression, resize_image
|
||||
from .utils import (
|
||||
decode_boxes,
|
||||
decode_landmarks,
|
||||
generate_anchors,
|
||||
non_max_supression,
|
||||
resize_image,
|
||||
)
|
||||
|
||||
|
||||
class RetinaFace(BaseDetector):
|
||||
@@ -53,13 +59,13 @@ class RetinaFace(BaseDetector):
|
||||
super().__init__(**kwargs)
|
||||
self._supports_landmarks = True # RetinaFace supports landmarks
|
||||
|
||||
self.model_name = kwargs.get('model_name', RetinaFaceWeights.MNET_V2)
|
||||
self.conf_thresh = kwargs.get('conf_thresh', 0.5)
|
||||
self.nms_thresh = kwargs.get('nms_thresh', 0.4)
|
||||
self.pre_nms_topk = kwargs.get('pre_nms_topk', 5000)
|
||||
self.post_nms_topk = kwargs.get('post_nms_topk', 750)
|
||||
self.dynamic_size = kwargs.get('dynamic_size', False)
|
||||
self.input_size = kwargs.get('input_size', (640, 640))
|
||||
self.model_name = kwargs.get("model_name", RetinaFaceWeights.MNET_V2)
|
||||
self.conf_thresh = kwargs.get("conf_thresh", 0.5)
|
||||
self.nms_thresh = kwargs.get("nms_thresh", 0.4)
|
||||
self.pre_nms_topk = kwargs.get("pre_nms_topk", 5000)
|
||||
self.post_nms_topk = kwargs.get("post_nms_topk", 750)
|
||||
self.dynamic_size = kwargs.get("dynamic_size", False)
|
||||
self.input_size = kwargs.get("input_size", (640, 640))
|
||||
|
||||
Logger.info(
|
||||
f"Initializing RetinaFace with model={self.model_name}, conf_thresh={self.conf_thresh}, nms_thresh={self.nms_thresh}, "
|
||||
@@ -127,7 +133,7 @@ class RetinaFace(BaseDetector):
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2.0
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
@@ -172,14 +178,16 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
# Calculate offsets from image center
|
||||
center = (original_height // 2, original_width // 2)
|
||||
offsets = np.vstack([
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0]
|
||||
])
|
||||
offsets = np.vstack(
|
||||
[
|
||||
(detections[:, 0] + detections[:, 2]) / 2 - center[1],
|
||||
(detections[:, 1] + detections[:, 3]) / 2 - center[0],
|
||||
]
|
||||
)
|
||||
offset_dist_squared = np.sum(np.power(offsets, 2.0), axis=0)
|
||||
|
||||
# Calculate scores based on the chosen metric
|
||||
if metric == 'max':
|
||||
if metric == "max":
|
||||
scores = areas
|
||||
else:
|
||||
scores = areas - offset_dist_squared * center_weight
|
||||
@@ -193,15 +201,17 @@ class RetinaFace(BaseDetector):
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4].astype(float).tolist(),
|
||||
'confidence': detections[i, 4].item(),
|
||||
'landmarks': landmarks[i].astype(float).tolist()
|
||||
"bbox": detections[i, :4].astype(float).tolist(),
|
||||
"confidence": detections[i, 4].item(),
|
||||
"landmarks": landmarks[i].astype(float).tolist(),
|
||||
}
|
||||
faces.append(face_dict)
|
||||
|
||||
return faces
|
||||
|
||||
def postprocess(self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def postprocess(
|
||||
self, outputs: List[np.ndarray], resize_factor: float, shape: Tuple[int, int]
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Process the model outputs into final detection results.
|
||||
|
||||
@@ -220,7 +230,11 @@ class RetinaFace(BaseDetector):
|
||||
- landmarks (np.ndarray): Array of detected facial landmarks.
|
||||
Shape: (num_detections, 5, 2), where each row contains 5 landmark points (x, y).
|
||||
"""
|
||||
loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
|
||||
loc, conf, landmarks = (
|
||||
outputs[0].squeeze(0),
|
||||
outputs[1].squeeze(0),
|
||||
outputs[2].squeeze(0),
|
||||
)
|
||||
|
||||
# Decode boxes and landmarks
|
||||
boxes = decode_boxes(loc, self._priors)
|
||||
@@ -236,7 +250,7 @@ class RetinaFace(BaseDetector):
|
||||
boxes, landmarks, scores = boxes[mask], landmarks[mask], scores[mask]
|
||||
|
||||
# Sort by scores
|
||||
order = scores.argsort()[::-1][:self.pre_nms_topk]
|
||||
order = scores.argsort()[::-1][: self.pre_nms_topk]
|
||||
boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
|
||||
|
||||
# Apply NMS
|
||||
@@ -245,13 +259,22 @@ class RetinaFace(BaseDetector):
|
||||
detections, landmarks = detections[keep], landmarks[keep]
|
||||
|
||||
# Keep top-k detections
|
||||
detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
|
||||
detections, landmarks = (
|
||||
detections[: self.post_nms_topk],
|
||||
landmarks[: self.post_nms_topk],
|
||||
)
|
||||
|
||||
landmarks = landmarks.reshape(-1, 5, 2).astype(np.int32)
|
||||
|
||||
return detections, landmarks
|
||||
|
||||
def _scale_detections(self, boxes: np.ndarray, landmarks: np.ndarray, resize_factor: float, shape: Tuple[int, int]) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def _scale_detections(
|
||||
self,
|
||||
boxes: np.ndarray,
|
||||
landmarks: np.ndarray,
|
||||
resize_factor: float,
|
||||
shape: Tuple[int, int],
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
# Scale bounding boxes and landmarks to the original image size.
|
||||
bbox_scale = np.array([shape[0], shape[1]] * 2)
|
||||
boxes = boxes * bbox_scale / resize_factor
|
||||
@@ -270,12 +293,13 @@ def draw_bbox(frame, bbox, score, color=(0, 255, 0), thickness=2):
|
||||
|
||||
|
||||
def draw_keypoints(frame, points, color=(0, 0, 255), radius=2):
|
||||
for (x, y) in points.astype(np.int32):
|
||||
for x, y in points.astype(np.int32):
|
||||
cv2.circle(frame, (int(x), int(y)), radius, color, -1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import cv2
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights.MNET_050)
|
||||
print(detector.get_info())
|
||||
cap = cv2.VideoCapture(0)
|
||||
@@ -298,9 +322,9 @@ if __name__ == "__main__":
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
bbox = face["bbox"] # [x1, y1, x2, y2]
|
||||
landmarks = face["landmarks"] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face["confidence"]
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
@@ -312,8 +336,15 @@ if __name__ == "__main__":
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
|
||||
@@ -173,7 +173,11 @@ class SCRFD(BaseDetector):
|
||||
return scores_list, bboxes_list, kpss_list
|
||||
|
||||
def detect(
|
||||
self, image: np.ndarray, max_num: int = 0, metric: Literal["default", "max"] = "max", center_weight: float = 2
|
||||
self,
|
||||
image: np.ndarray,
|
||||
max_num: int = 0,
|
||||
metric: Literal["default", "max"] = "max",
|
||||
center_weight: float = 2,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
@@ -311,7 +315,15 @@ if __name__ == "__main__":
|
||||
draw_keypoints(frame, points)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(frame, f"Faces: {len(faces)}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
||||
cv2.putText(
|
||||
frame,
|
||||
f"Faces: {len(faces)}",
|
||||
(10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.7,
|
||||
(255, 255, 255),
|
||||
2,
|
||||
)
|
||||
|
||||
cv2.imshow("FaceDetection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
|
||||
@@ -59,12 +59,7 @@ def generate_anchors(image_size: Tuple[int, int] = (640, 640)) -> np.ndarray:
|
||||
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
anchors = []
|
||||
feature_maps = [
|
||||
[
|
||||
math.ceil(image_size[0] / step),
|
||||
math.ceil(image_size[1] / step)
|
||||
] for step in steps
|
||||
]
|
||||
feature_maps = [[math.ceil(image_size[0] / step), math.ceil(image_size[1] / step)] for step in steps]
|
||||
|
||||
for k, (map_height, map_width) in enumerate(feature_maps):
|
||||
step = steps[k]
|
||||
|
||||
@@ -8,7 +8,12 @@ import cv2
|
||||
import numpy as np
|
||||
from skimage.transform import SimilarityTransform
|
||||
|
||||
__all__ = ["face_alignment", "compute_similarity", "bbox_center_alignment", "transform_points_2d"]
|
||||
__all__ = [
|
||||
"face_alignment",
|
||||
"compute_similarity",
|
||||
"bbox_center_alignment",
|
||||
"transform_points_2d",
|
||||
]
|
||||
|
||||
|
||||
# Reference alignment for facial landmarks (ArcFace)
|
||||
@@ -18,9 +23,9 @@ reference_alignment: np.ndarray = np.array(
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041]
|
||||
[70.7299, 92.2041],
|
||||
],
|
||||
dtype=np.float32
|
||||
dtype=np.float32,
|
||||
)
|
||||
|
||||
|
||||
@@ -72,7 +77,11 @@ def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]]
|
||||
return matrix, inverse_matrix
|
||||
|
||||
|
||||
def face_alignment(image: np.ndarray, landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
|
||||
def face_alignment(
|
||||
image: np.ndarray,
|
||||
landmark: np.ndarray,
|
||||
image_size: Union[int, Tuple[int, int]] = 112,
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Align the face in the input image based on the given facial landmarks.
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from .base import BaseLandmarker
|
||||
from .models import Landmark106
|
||||
|
||||
|
||||
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
def create_landmarker(method: str = "2d106det", **kwargs) -> BaseLandmarker:
|
||||
"""
|
||||
Factory function to create facial landmark predictors.
|
||||
|
||||
@@ -18,15 +18,11 @@ def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
|
||||
Initialized landmarker instance.
|
||||
"""
|
||||
method = method.lower()
|
||||
if method == '2d106det':
|
||||
if method == "2d106det":
|
||||
return Landmark106(**kwargs)
|
||||
else:
|
||||
available = ['2d106det']
|
||||
available = ["2d106det"]
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_landmarker",
|
||||
"Landmark106",
|
||||
"BaseLandmarker"
|
||||
]
|
||||
__all__ = ["create_landmarker", "Landmark106", "BaseLandmarker"]
|
||||
|
||||
@@ -11,6 +11,7 @@ class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
|
||||
@@ -15,7 +15,7 @@ from uniface.onnx_utils import create_onnx_session
|
||||
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark']
|
||||
__all__ = ["Landmark"]
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
@@ -42,15 +42,13 @@ class Landmark106(BaseLandmarker):
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192)
|
||||
input_size: Tuple[int, int] = (192, 192),
|
||||
) -> None:
|
||||
Logger.info(
|
||||
f"Initializing Facial Landmark with model={model_name}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
Logger.info(f"Initializing Facial Landmark with model={model_name}, input_size={input_size}")
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
@@ -85,7 +83,7 @@ class Landmark106(BaseLandmarker):
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}")
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}") from e
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
@@ -110,8 +108,11 @@ class Landmark106(BaseLandmarker):
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, 1.0 / self.input_std, self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True,
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
@@ -131,7 +132,7 @@ class Landmark106(BaseLandmarker):
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= (self.input_size[0] // 2)
|
||||
landmarks[:, 0:2] *= self.input_size[0] // 2
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
@@ -151,14 +152,11 @@ class Landmark106(BaseLandmarker):
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(
|
||||
self.output_names, {self.input_names[0]: face_blob}
|
||||
)[0][0]
|
||||
raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
|
||||
# Testing code
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import RetinaFace
|
||||
@@ -185,21 +183,21 @@ if __name__ == "__main__":
|
||||
|
||||
if not faces:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face['bbox']
|
||||
bbox = face["bbox"]
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for (x, y) in landmarks.astype(int):
|
||||
for x, y in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
@@ -207,7 +205,7 @@ if __name__ == "__main__":
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
if cv2.waitKey(1) & 0xFF == ord("q"):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
|
||||
@@ -19,10 +19,7 @@ def enable_logging(level=logging.INFO):
|
||||
"""
|
||||
Logger.handlers.clear()
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
))
|
||||
handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"))
|
||||
Logger.addHandler(handler)
|
||||
Logger.setLevel(level)
|
||||
Logger.propagate = False
|
||||
|
||||
@@ -11,10 +11,10 @@ from tqdm import tqdm
|
||||
import uniface.constants as const
|
||||
from uniface.log import Logger
|
||||
|
||||
__all__ = ['verify_model_weights']
|
||||
__all__ = ["verify_model_weights"]
|
||||
|
||||
|
||||
def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> str:
|
||||
def verify_model_weights(model_name: str, root: str = "~/.uniface/models") -> str:
|
||||
"""
|
||||
Ensure model weights are present, downloading and verifying them using SHA-256 if necessary.
|
||||
|
||||
@@ -53,7 +53,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
raise ValueError(f"No URL found for model '{model_name}'")
|
||||
|
||||
file_ext = os.path.splitext(url)[1]
|
||||
model_path = os.path.normpath(os.path.join(root, f'{model_name.value}{file_ext}'))
|
||||
model_path = os.path.normpath(os.path.join(root, f"{model_name.value}{file_ext}"))
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
Logger.info(f"Downloading model '{model_name}' from {url}")
|
||||
@@ -62,7 +62,7 @@ def verify_model_weights(model_name: str, root: str = '~/.uniface/models') -> st
|
||||
Logger.info(f"Successfully downloaded '{model_name}' to {model_path}")
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to download model '{model_name}': {e}")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'")
|
||||
raise ConnectionError(f"Download failed for '{model_name}'") from e
|
||||
|
||||
expected_hash = const.MODEL_SHA256.get(model_name)
|
||||
if expected_hash and not verify_file_hash(model_path, expected_hash):
|
||||
@@ -78,18 +78,21 @@ def download_file(url: str, dest_path: str) -> None:
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as file, tqdm(
|
||||
desc=f"Downloading {dest_path}",
|
||||
unit='B',
|
||||
unit_scale=True,
|
||||
unit_divisor=1024
|
||||
) as progress:
|
||||
with (
|
||||
open(dest_path, "wb") as file,
|
||||
tqdm(
|
||||
desc=f"Downloading {dest_path}",
|
||||
unit="B",
|
||||
unit_scale=True,
|
||||
unit_divisor=1024,
|
||||
) as progress,
|
||||
):
|
||||
for chunk in response.iter_content(chunk_size=const.CHUNK_SIZE):
|
||||
if chunk:
|
||||
file.write(chunk)
|
||||
progress.update(len(chunk))
|
||||
except requests.RequestException as e:
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}")
|
||||
raise ConnectionError(f"Failed to download file from {url}. Error: {e}") from e
|
||||
|
||||
|
||||
def verify_file_hash(file_path: str, expected_hash: str) -> bool:
|
||||
|
||||
@@ -19,6 +19,7 @@ class PreprocessConfig:
|
||||
"""
|
||||
Configuration for preprocessing images before feeding them into the model.
|
||||
"""
|
||||
|
||||
input_mean: Union[float, List[float]] = 127.5
|
||||
input_std: Union[float, List[float]] = 127.5
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
@@ -29,6 +30,7 @@ class BaseRecognizer(ABC):
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
@@ -74,7 +76,10 @@ class BaseRecognizer(ABC):
|
||||
Logger.info(f"Successfully initialized face encoder from {self.model_path}")
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load face encoder model from '{self.model_path}'", exc_info=True)
|
||||
Logger.error(
|
||||
f"Failed to load face encoder model from '{self.model_path}'",
|
||||
exc_info=True,
|
||||
)
|
||||
raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
|
||||
|
||||
def preprocess(self, face_img: np.ndarray) -> np.ndarray:
|
||||
@@ -92,8 +97,9 @@ class BaseRecognizer(ABC):
|
||||
if isinstance(self.input_std, (list, tuple)):
|
||||
# Per-channel normalization
|
||||
rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / \
|
||||
np.array(self.input_std, dtype=np.float32)
|
||||
normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
|
||||
self.input_std, dtype=np.float32
|
||||
)
|
||||
|
||||
# Change to NCHW (batch, channels, height, width)
|
||||
blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
|
||||
@@ -105,7 +111,7 @@ class BaseRecognizer(ABC):
|
||||
scalefactor=1.0 / self.input_std,
|
||||
size=self.input_size,
|
||||
mean=(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
swapRB=True, # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return blob
|
||||
|
||||
@@ -34,14 +34,10 @@ class ArcFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -68,14 +64,10 @@ class MobileFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -102,14 +94,10 @@ class SphereFace(BaseRecognizer):
|
||||
def __init__(
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
preprocessing: Optional[PreprocessConfig] = None,
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
preprocessing = PreprocessConfig(
|
||||
input_mean=127.5,
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
preprocessing = PreprocessConfig(input_mean=127.5, input_std=127.5, input_size=(112, 112))
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -44,7 +44,13 @@ def draw_detections(
|
||||
|
||||
# Draw score
|
||||
cv2.putText(
|
||||
image, f"{score:.2f}", (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness
|
||||
image,
|
||||
f"{score:.2f}",
|
||||
(bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX,
|
||||
0.5,
|
||||
(255, 255, 255),
|
||||
thickness,
|
||||
)
|
||||
|
||||
# Draw landmarks
|
||||
|
||||
Reference in New Issue
Block a user