ref: Update attribute and landmark modules

This commit is contained in:
yakhyo
2025-07-04 16:30:41 +09:00
parent 30a177981d
commit 98f8acc51b
7 changed files with 378 additions and 299 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,5 @@
tmp_*
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

View File

@@ -1,2 +1,96 @@
from .age_gender import AgeGender
from .emotion import Emotion
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Dict, Any, List, Union
import numpy as np
from uniface.attribute.age_gender import AgeGender
from uniface.attribute.emotion import Emotion
from uniface.attribute.base import Attribute
from uniface.constants import AgeGenderWeights, DDAMFNWeights
# Public API for the attribute module
__all__ = [
"AgeGender",
"Emotion",
"create_attribute_predictor",
"predict_attributes"
]
# A mapping from model enums to their corresponding attribute classes
_ATTRIBUTE_MODELS = {
**{model: AgeGender for model in AgeGenderWeights},
**{model: Emotion for model in DDAMFNWeights}
}
def create_attribute_predictor(
model_name: Union[AgeGenderWeights, DDAMFNWeights],
**kwargs: Any
) -> Attribute:
"""
Factory function to create an attribute predictor instance.
This high-level API simplifies the creation of attribute models by
dynamically selecting the correct class based on the provided model enum.
Args:
model_name: The enum corresponding to the desired attribute model
(e.g., AgeGenderWeights.DEFAULT or DDAMFNWeights.AFFECNET7).
**kwargs: Additional keyword arguments to pass to the model's constructor.
Returns:
An initialized instance of an Attribute predictor class (e.g., AgeGender).
Raises:
ValueError: If the provided model_name is not a supported enum.
"""
model_class = _ATTRIBUTE_MODELS.get(model_name)
if model_class is None:
raise ValueError(f"Unsupported attribute model: {model_name}. "
f"Please choose from AgeGenderWeights or DDAMFNWeights.")
# Pass model_name to the constructor, as some classes might need it
return model_class(model_name=model_name, **kwargs)
def predict_attributes(
image: np.ndarray,
detections: List[Dict[str, np.ndarray]],
predictor: Attribute
) -> List[Dict[str, Any]]:
"""
High-level API to predict attributes for multiple detected faces.
This function iterates through a list of face detections, runs the
specified attribute predictor on each one, and appends the results back
into the detection dictionary.
Args:
image (np.ndarray): The full input image in BGR format.
detections (List[Dict]): A list of detection results, where each dict
must contain a 'bbox' and optionally 'landmark'.
predictor (Attribute): An initialized attribute predictor instance,
created by `create_attribute_predictor`.
Returns:
The list of detections, where each dictionary is updated with a new
'attributes' key containing the prediction result.
"""
for face in detections:
# Initialize attributes dict if it doesn't exist
if 'attributes' not in face:
face['attributes'] = {}
if isinstance(predictor, AgeGender):
gender, age = predictor(image, face['bbox'])
face['attributes']['gender'] = gender
face['attributes']['age'] = age
elif isinstance(predictor, Emotion):
emotion, confidence = predictor(image, face['landmark'])
face['attributes']['emotion'] = emotion
face['attributes']['confidence'] = confidence
return detections

View File

@@ -5,232 +5,176 @@
import cv2
import numpy as np
import onnxruntime as ort
from typing import Tuple, Union, List
from typing import Tuple
from uniface.attribute.base import Attribute
from uniface.log import Logger
from uniface.constants import AgeGenderWeights
from uniface.face_utils import bbox_center_alignment
from uniface.model_store import verify_model_weights
__all__ = ["AgeGender"]
class AgeGender:
class AgeGender(Attribute):
"""
Age and gender prediction model using ONNX Runtime.
Loads a pretrained ONNX model to predict both age (in years) and gender
(0: female, 1: male) from a detected face region. Handles model loading,
preprocessing, inference, and output interpretation.
Attributes:
input_size (Tuple[int, int]): Model's expected input resolution (width, height).
input_mean (float): Mean value used for input normalization.
input_std (float): Standard deviation used for input normalization.
model_path (str): Path to the verified ONNX model file.
session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
input_names (List[str]): List of input node names.
output_names (List[str]): List of output node names.
Args:
model_name (AgeGenderWeights): Enum specifying the age-gender model to load.
input_size (Tuple[int, int]): Resolution for model input; defaults to (112, 112).
This class inherits from the base `Attribute` class and implements the
functionality for predicting age (in years) and gender (0 for female,
1 for male) from a face image. It requires a bounding box to locate the face.
"""
def __init__(
self,
model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT,
input_size: Tuple[int, int] = (112, 112)
) -> None:
def __init__(self, model_name: AgeGenderWeights = AgeGenderWeights.DEFAULT) -> None:
"""
Initializes the Age and Gender prediction model.
Initializes the AgeGender prediction model.
Args:
model_name: Model weights enum to use
input_size: Input resolution for the model (width, height)
model_name (AgeGenderWeights): The enum specifying the model weights
to load.
"""
Logger.info(
f"Initializing AgeGender with model={model_name}, "
f"input_size={input_size}"
)
# Model configuration
self.input_size = input_size
self.input_std = 1.0
self.input_mean = 0.0
# Get path to model weights
Logger.info(f"Initializing AgeGender with model={model_name.name}")
self.model_path = verify_model_weights(model_name)
Logger.info(f"Verified model weights located at: {self.model_path}")
# Initialize model
self._initialize_model()
def _initialize_model(self):
def _initialize_model(self) -> None:
"""
Initialize the ONNX model for inference.
Raises:
RuntimeError: If the model fails to load or initialize.
Initializes the ONNX model and creates an inference session.
"""
try:
# Initialize session with available providers
self.session = ort.InferenceSession(
self.model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
)
# Extract model metadata
input_metadata = self.session.get_inputs()[0]
input_shape = input_metadata.shape
self.input_size = tuple(input_shape[2:4][::-1]) # Update from model (width, height)
# Get input/output names
self.input_names = [input.name for input in self.session.get_inputs()]
# Get model input details from the loaded model
input_meta = self.session.get_inputs()[0]
self.input_name = input_meta.name
self.input_size = tuple(input_meta.shape[2:4]) # (height, width)
self.output_names = [output.name for output in self.session.get_outputs()]
Logger.info(f"Successfully initialized AgeGender model")
Logger.info(f"Successfully initialized AgeGender model with input size {self.input_size}")
except Exception as e:
Logger.error(f"Failed to load AgeGender model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize AgeGender model: {e}")
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
def preprocess(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> np.ndarray:
"""
Preprocess the input image and face bounding box for inference.
Aligns the face based on the bounding box and preprocesses it for inference.
Args:
image: Input image in BGR format
bbox: Face bounding box coordinates [x1, y1, x2, y2]
image (np.ndarray): The full input image in BGR format.
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
Returns:
Preprocessed image blob ready for inference
np.ndarray: The preprocessed image blob ready for inference.
"""
# Calculate face dimensions and center
bbox = np.asarray(bbox)
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
scale = self.input_size[1] / (max(width, height) * 1.5)
# Determine scale to fit face with margin
scale = self.input_size[0] / (max(width, height) * 1.5)
# **Rotation parameter restored here**
rotation = 0.0
# Align face based on bounding box
aligned_face, _ = bbox_center_alignment(
image, center, self.input_size[0], scale, rotation
image, center, self.input_size[1], scale, rotation
)
# Convert to blob format for network input
face_blob = cv2.dnn.blobFromImage(
blob = cv2.dnn.blobFromImage(
aligned_face,
1.0 / self.input_std,
self.input_size,
(self.input_mean, self.input_mean, self.input_mean),
swapRB=True # Convert BGR to RGB
scalefactor=1.0,
size=self.input_size[::-1],
mean=(0.0, 0.0, 0.0),
swapRB=True
)
return blob
return face_blob
def postprocess(self, predictions: np.ndarray) -> Tuple[int, int]:
def postprocess(self, prediction: np.ndarray) -> Tuple[str, int]:
"""
Process model predictions to extract gender and age.
Processes the raw model output to extract gender and age.
Args:
predictions: Raw model output, shape [1, 3] where:
- First two elements represent gender logits
- Third element represents normalized age
prediction (np.ndarray): The raw output from the model inference.
Returns:
Tuple containing:
- Gender (0: female, 1: male)
- Age in years
Tuple[str, int]: A tuple containing the predicted gender label ("Female" or "Male")
and age (in years).
"""
# First two values are gender logits (female/male)
gender = int(np.argmax(predictions[:2]))
# Third value is normalized age that needs scaling
age = int(np.round(predictions[2] * 100))
# First two values are gender logits
gender_id = int(np.argmax(prediction[:2]))
gender = "Female" if gender_id == 0 else "Male"
# Third value is normalized age, scaled by 100
age = int(np.round(prediction[2] * 100))
return gender, age
def predict(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[int, int]:
def predict(self, image: np.ndarray, bbox: Union[List, np.ndarray]) -> Tuple[str, int]:
"""
Predict age and gender for a face in the image.
Predicts age and gender for a single face specified by a bounding box.
Args:
image: Input image in BGR format
bbox: Face bounding box [x1, y1, x2, y2]
image (np.ndarray): The full input image in BGR format.
bbox (Union[List, np.ndarray]): The face bounding box coordinates [x1, y1, x2, y2].
Returns:
- 'gender_id': Gender as integer (0: female, 1: male)
- 'age': Age in years
Tuple[str, int]: A tuple containing the predicted gender label and age.
"""
# Preprocess and run inference
face_blob = self.preprocess(image, bbox)
predictions = self.session.run(
self.output_names,
{self.input_names[0]: face_blob}
)[0][0]
# Extract gender and age from predictions
gender_id, age = self.postprocess(predictions)
return gender_id, age
prediction = self.session.run(self.output_names, {self.input_name: face_blob})[0][0]
gender, age = self.postprocess(prediction)
return gender, age
# TODO: For testing purposes only, remove later
def main():
from uniface.detection import RetinaFace
# TODO: below is only for testing, remove it later
if __name__ == "__main__":
# To run this script, you need to have uniface.detection installed
# or available in your path.
from uniface.detection import create_detector
from uniface.constants import RetinaFaceWeights
face_detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
post_nms_topk=750,
dynamic_size=False,
input_size=(640, 640)
)
age_detector = AgeGender()
print("Initializing models for live inference...")
# 1. Initialize the face detector
# Using a smaller model for faster real-time performance
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
# 2. Initialize the attribute predictor
age_gender_predictor = AgeGender()
# 3. Start webcam capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Webcam not available.")
return
print("Error: Could not open webcam.")
exit()
print("Press 'q' to quit.")
print("Starting webcam feed. Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print("Frame capture failed.")
print("Error: Failed to capture frame.")
break
boxes, landmarks = face_detector.detect(frame)
# Detect faces in the current frame
detections = detector.detect(frame)
for box, landmark in zip(boxes, landmarks):
x1, y1, x2, y2, score = box.astype(int)
face_crop = frame[y1:y2, x1:x2]
# For each detected face, predict age and gender
for detection in detections:
box = detection['bbox']
x1, y1, x2, y2 = map(int, box)
if face_crop.size == 0:
continue
# Predict attributes
gender, age = age_gender_predictor.predict(frame, box)
gender, age = age_detector.predict(frame, box[:4])
txt = f"{gender} ({age:.2f})"
# Prepare text and draw on the frame
label = f"{gender}, {age}"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, txt, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.imshow("Face + Emotion Detection", frame)
# Display the resulting frame
cv2.imshow("Age and Gender Inference (Press 'q' to quit)", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
print("Inference stopped.")

91
uniface/attribute/base.py Normal file
View File

@@ -0,0 +1,91 @@
# Copyright 2025 Yakhyokhuja Valikhujaev
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from abc import ABC, abstractmethod
from typing import Any
import numpy as np
class Attribute(ABC):
"""
Abstract base class for face attribute models.
This class defines the common interface that all attribute models
(e.g., age-gender, emotion) must implement. It ensures a consistent API
across different attribute prediction modules in the library, making them
interchangeable and easy to use.
"""
@abstractmethod
def _initialize_model(self) -> None:
"""
Initializes the underlying model for inference.
This method should handle loading model weights, creating the
inference session (e.g., ONNX Runtime, PyTorch), and any necessary
warm-up procedures to prepare the model for prediction.
"""
raise NotImplementedError("Subclasses must implement the _initialize_model method.")
@abstractmethod
def preprocess(self, image: np.ndarray, *args: Any) -> Any:
"""
Preprocesses the input data for the model.
This method should take a raw image and any other necessary data
(like bounding boxes or landmarks) and convert it into the format
expected by the model's inference engine (e.g., a blob or tensor).
Args:
image (np.ndarray): The input image containing the face, typically
in BGR format.
*args: Additional arguments required for preprocessing, such as
bounding boxes or facial landmarks.
Returns:
The preprocessed data ready for model inference.
"""
raise NotImplementedError("Subclasses must implement the preprocess method.")
@abstractmethod
def postprocess(self, prediction: Any) -> Any:
"""
Postprocesses the raw model output into a human-readable format.
This method takes the raw output from the model's inference and
converts it into a meaningful result, such as an age value, a gender
label, or an emotion category.
Args:
prediction (Any): The raw output from the model's inference.
Returns:
The final, processed attributes.
"""
raise NotImplementedError("Subclasses must implement the postprocess method.")
@abstractmethod
def predict(self, image: np.ndarray, *args: Any) -> Any:
"""
Performs end-to-end attribute prediction on a given image.
This method orchestrates the full pipeline: it calls the preprocess,
inference, and postprocess steps to return the final, user-friendly
attribute prediction.
Args:
image (np.ndarray): The input image containing the face.
*args: Additional data required for prediction, such as a bounding
box or landmarks.
Returns:
The final predicted attributes.
"""
raise NotImplementedError("Subclasses must implement the predict method.")
def __call__(self, *args, **kwargs) -> Any:
"""
Provides a convenient, callable shortcut for the `predict` method.
"""
return self.predict(*args, **kwargs)

View File

@@ -5,218 +5,166 @@
import cv2
import torch
import numpy as np
from typing import Tuple, Union, List
from typing import Tuple, Union
from uniface.attribute.base import Attribute
from uniface.log import Logger
from uniface.constants import DDAMFNWeights
from uniface.face_utils import face_alignment
from uniface.model_store import verify_model_weights
__all__ = ["Emotion"]
class Emotion:
class Emotion(Attribute):
"""
Emotion recognition using a TorchScript model.
Emotion recognition model using a TorchScript model.
Args:
model_weights (DDAMFNWeights): Pretrained model weights enum. Defaults to AFFECNET7.
input_size (Tuple[int, int]): Size of input images. Defaults to (112, 112).
Attributes:
emotion_labels (List[str]): List of emotion labels the model can predict.
device (torch.device): Inference device (CPU or CUDA).
model (torch.jit.ScriptModule): Loaded TorchScript model.
Raises:
ValueError: If model weights are invalid or not found.
RuntimeError: If model loading fails.
This class inherits from the base `Attribute` class and implements the
functionality for predicting one of several emotion categories from a face
image. It requires 5-point facial landmarks for alignment.
"""
def __init__(
self,
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
input_size: Tuple[int, int] = (112, 112)
self,
model_weights: DDAMFNWeights = DDAMFNWeights.AFFECNET7,
input_size: Tuple[int, int] = (112, 112),
) -> None:
"""
Initialize the emotion detector with a TorchScript model
"""
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.emotion_labels = [
"Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"
]
Initializes the emotion recognition model.
# Add contempt for AFFECNET8 model
Args:
model_weights (DDAMFNWeights): The enum for the model weights to load.
input_size (Tuple[int, int]): The expected input size for the model.
"""
Logger.info(f"Initializing Emotion with model={model_weights.name}")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.input_size = input_size
self.model_path = verify_model_weights(model_weights)
# Define emotion labels based on the selected model
self.emotion_labels = ["Neutral", "Happy", "Sad", "Surprise", "Fear", "Disgust", "Angry"]
if model_weights == DDAMFNWeights.AFFECNET8:
self.emotion_labels.append("Contempt")
# Initialize image preprocessing parameters
self.input_size = input_size
self.normalization_std = [0.229, 0.224, 0.225]
self.normalization_mean = [0.485, 0.456, 0.406]
self._initialize_model()
Logger.info(
f"Initialized Emotion class with model={model_weights.name}, "
f"device={'cuda' if torch.cuda.is_available() else 'cpu'}, "
f"num_classes={len(self.emotion_labels)}, input_size={self.input_size}"
)
# Get path to model weights and initialize model
self.model_path = verify_model_weights(model_weights)
Logger.info(f"Verified model weights located at: {self.model_path}")
self._load_model()
def _load_model(self) -> None:
def _initialize_model(self) -> None:
"""
Loads and initializes a TorchScript model for emotion inference.
Raises:
RuntimeError: If loading the model fails.
Loads and initializes the TorchScript model for inference.
"""
try:
self.model = torch.jit.load(self.model_path, map_location=self.device)
self.model.eval()
Logger.info(f"TorchScript model successfully loaded from: {self.model_path}")
# Warm-up with dummy input
# Warm-up with a dummy input for faster first inference
dummy_input = torch.randn(1, 3, *self.input_size).to(self.device)
with torch.no_grad():
_ = self.model(dummy_input)
Logger.info("Emotion model warmed up with dummy input.")
self.model(dummy_input)
Logger.info(f"Successfully initialized Emotion model on {self.device}")
except Exception as e:
Logger.error(f"Failed to load TorchScript model from {self.model_path}: {e}")
raise RuntimeError(f"Model loading failed: {str(e)}")
Logger.error(f"Failed to load Emotion model from '{self.model_path}'", exc_info=True)
raise RuntimeError(f"Failed to initialize Emotion model: {e}")
def preprocess(self, image: np.ndarray) -> torch.Tensor:
def preprocess(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> torch.Tensor:
"""
Preprocess image for model inference: resize, normalize and convert to tensor.
Aligns the face using landmarks and preprocesses it into a tensor.
Args:
image (np.ndarray): BGR image (H, W, 3)
image (np.ndarray): The full input image in BGR format.
landmark (Union[List, np.ndarray]): The 5-point facial landmarks.
Returns:
torch.Tensor: Preprocessed image tensor of shape (1, 3, H, W)
torch.Tensor: The preprocessed image tensor ready for inference.
"""
# Convert BGR to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
landmark = np.asarray(landmark)
# Resize to target input size
aligned_image, _ = face_alignment(image, landmark)
# Convert BGR to RGB, resize, normalize, and convert to a CHW tensor
rgb_image = cv2.cvtColor(aligned_image, cv2.COLOR_BGR2RGB)
resized_image = cv2.resize(rgb_image, self.input_size).astype(np.float32) / 255.0
# Normalize with mean and std
mean_array = np.array(self.normalization_mean, dtype=np.float32)
std_array = np.array(self.normalization_std, dtype=np.float32)
normalized_image = (resized_image - mean_array) / std_array
# Convert from HWC to CHW format
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
normalized_image = (resized_image - mean) / std
transposed_image = normalized_image.transpose((2, 0, 1))
# Convert to torch tensor and add batch dimension
tensor = torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
return tensor
return torch.from_numpy(transposed_image).unsqueeze(0).to(self.device)
def predict(self, image: np.ndarray, landmark: np.ndarray) -> Tuple[Union[str, None], Union[float, None]]:
def postprocess(self, prediction: torch.Tensor) -> Tuple[str, float]:
"""
Predict the emotion from a face image.
Args:
image (np.ndarray): Input face image in BGR format.
landmark (np.ndarray): Facial five point landmark.
Returns:
Tuple[str, float]: (Predicted emotion label, Confidence score)
Returns (None, None) if prediction fails.
Raises:
ValueError: If the input is not a valid BGR image.
Processes the raw model output to get the emotion label and confidence score.
"""
# Validate input
if not isinstance(image, np.ndarray):
Logger.error("Input must be a NumPy ndarray.")
raise ValueError("Input must be a NumPy ndarray (BGR image).")
probabilities = torch.nn.functional.softmax(prediction, dim=1).squeeze().cpu().numpy()
pred_index = np.argmax(probabilities)
emotion_label = self.emotion_labels[pred_index]
confidence = float(probabilities[pred_index])
return emotion_label, confidence
if image.ndim != 3 or image.shape[2] != 3:
Logger.error(f"Invalid image shape: {image.shape}. Expected HxWx3 image.")
raise ValueError("Input image must have shape (H, W, 3).")
def predict(self, image: np.ndarray, landmark: Union[List, np.ndarray]) -> Tuple[str, float]:
"""
Predicts the emotion from a single face specified by its landmarks.
"""
input_tensor = self.preprocess(image, landmark)
with torch.no_grad():
output = self.model(input_tensor)
if isinstance(output, tuple):
output = output[0]
try:
# Align face using landmarks
aligned_image, _ = face_alignment(image, landmark)
# Preprocess and run inference
input_tensor = self.preprocess(aligned_image)
with torch.no_grad():
output = self.model(input_tensor)
# Handle case where model returns a tuple
if isinstance(output, tuple):
output = output[0]
# Get probabilities and prediction
probabilities = torch.nn.functional.softmax(output, dim=1).squeeze(0).cpu().numpy()
predicted_index = int(np.argmax(probabilities))
confidence_score = round(float(probabilities[predicted_index]), 2)
return self.emotion_labels[predicted_index], confidence_score
except Exception as e:
Logger.error(f"Emotion inference failed: {e}")
return None, None
return self.postprocess(output)
# TODO: For testing purposes only, remove later
def main():
from uniface import RetinaFace
# TODO: below is only for testing, remove it later
if __name__ == "__main__":
from uniface.detection import create_detector
from uniface.constants import RetinaFaceWeights
face_detector = RetinaFace(
model_name=RetinaFaceWeights.MNET_V2,
conf_thresh=0.5,
pre_nms_topk=5000,
nms_thresh=0.4,
post_nms_topk=750,
dynamic_size=False,
input_size=(640, 640)
)
emotion_detector = Emotion()
print("Initializing models for live inference...")
# 1. Initialize the face detector
# Using a smaller model for faster real-time performance
detector = create_detector(model_name=RetinaFaceWeights.MNET_V2)
# 2. Initialize the attribute predictor
emotion_predictor = Emotion()
# 3. Start webcam capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Webcam not available.")
return
print("Error: Could not open webcam.")
exit()
print("Press 'q' to quit.")
print("Starting webcam feed. Press 'q' to quit.")
while True:
ret, frame = cap.read()
if not ret:
print("Frame capture failed.")
print("Error: Failed to capture frame.")
break
boxes, landmarks = face_detector.detect(frame)
# Detect faces in the current frame.
# This method returns a list of dictionaries for each detected face.
detections = detector.detect(frame)
for box, landmark in zip(boxes, landmarks):
x1, y1, x2, y2, score = box.astype(int)
face_crop = frame[y1:y2, x1:x2]
# For each detected face, predict the emotion
for detection in detections:
box = detection['bbox']
landmark = detection['landmarks']
x1, y1, x2, y2 = map(int, box)
if face_crop.size == 0:
continue
# Predict attributes using the landmark
emotion, confidence = emotion_predictor.predict(frame, landmark)
emotion, preds = emotion_detector.predict(frame, landmark)
# Prepare text and draw on the frame
label = f"{emotion} ({confidence:.2f})"
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
txt = f"{emotion} ({preds:.2f})"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, txt, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
# Display the resulting frame
cv2.imshow("Emotion Inference (Press 'q' to quit)", frame)
cv2.imshow("Face + Emotion Detection", frame)
# Break the loop if 'q' is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
print("Inference stopped.")

View File

@@ -6,7 +6,7 @@ from .models import Landmark106
from .base import BaseLandmarker
def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker:
def create_landmarker(method: str = '2d106det', **kwargs) -> BaseLandmarker:
"""
Factory function to create facial landmark predictors.
@@ -18,10 +18,10 @@ def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker:
Initialized landmarker instance.
"""
method = method.lower()
if method == 'insightface_106':
if method == '2d106det':
return Landmark106(**kwargs)
else:
available = ['insightface_106']
available = ['2d106det']
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")

View File

@@ -168,7 +168,7 @@ if __name__ == "__main__":
# 1. Create the detector and landmarker using the new API
face_detector = create_detector('retinaface')
landmarker = create_landmarker() # Uses the default '106' method
landmarker = create_landmarker() # Uses the default '2d106det' method
cap = cv2.VideoCapture(0)
if not cap.isOpened():