mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 00:52:25 +00:00
feat: Update recognition, landmark modules
This commit is contained in:
@@ -4,16 +4,17 @@ import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace, draw_detections, SCRFD
|
||||
from uniface.constants import RetinaFaceWeights, SCRFDWeights
|
||||
# UPDATED: Use the factory function and import from the new location
|
||||
from uniface.detection import create_detector
|
||||
from uniface.visualization import draw_detections
|
||||
|
||||
|
||||
def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
|
||||
def run_inference(detector, image_path: str, vis_threshold: float = 0.6, save_dir: str = "outputs"):
|
||||
"""
|
||||
Run face detection on a single image.
|
||||
|
||||
Args:
|
||||
model (RetinaFace): Initialized RetinaFace model.
|
||||
detector: Initialized face detector.
|
||||
image_path (str): Path to input image.
|
||||
vis_threshold (float): Threshold for drawing detections.
|
||||
save_dir (str): Directory to save output image.
|
||||
@@ -23,8 +24,18 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
|
||||
print(f"❌ Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
boxes, landmarks = model.detect(image)
|
||||
draw_detections(image, (boxes, landmarks), vis_threshold)
|
||||
# 1. Get the list of face dictionaries from the detector
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
# 2. Unpack the data into separate lists
|
||||
bboxes = [face['bbox'] for face in faces]
|
||||
scores = [face['confidence'] for face in faces]
|
||||
landmarks = [face['landmarks'] for face in faces]
|
||||
|
||||
# 3. Pass the unpacked lists to the drawing function
|
||||
draw_detections(image, bboxes, scores, landmarks, vis_threshold=0.6)
|
||||
|
||||
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
output_path = os.path.join(save_dir, f"{os.path.splitext(os.path.basename(image_path))[0]}_out.jpg")
|
||||
@@ -33,28 +44,38 @@ def run_inference(model, image_path, vis_threshold=0.6, save_dir="outputs"):
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Run RetinaFace inference on an image.")
|
||||
parser = argparse.ArgumentParser(description="Run face detection on an image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image")
|
||||
parser.add_argument("--model", type=str, default="MNET_V2", choices=[m.name for m in RetinaFaceWeights], help="Model variant to use")
|
||||
parser.add_argument(
|
||||
"--method",
|
||||
type=str,
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Detection method to use."
|
||||
)
|
||||
parser.add_argument("--threshold", type=float, default=0.6, help="Visualization confidence threshold")
|
||||
parser.add_argument("--iterations", type=int, default=1, help="Number of inference runs for benchmarking")
|
||||
parser.add_argument("--save_dir", type=str, default="outputs", help="Directory to save output images")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
model_name = RetinaFaceWeights[args.model]
|
||||
model = RetinaFace(model_name=model_name)
|
||||
print(f"Initializing detector: {args.method}")
|
||||
detector = create_detector(method=args.method)
|
||||
|
||||
avg_time = 0
|
||||
for i in range(args.iterations):
|
||||
start = time.time()
|
||||
run_inference(model, args.image, args.threshold, args.save_dir)
|
||||
run_inference(detector, args.image, args.threshold, args.save_dir)
|
||||
elapsed = time.time() - start
|
||||
print(f"[{i + 1}/{args.iterations}] ⏱️ Inference time: {elapsed:.4f} seconds")
|
||||
avg_time += elapsed
|
||||
if i >= 0: # Avoid counting the first run if it includes model loading time
|
||||
avg_time += elapsed
|
||||
|
||||
if args.iterations > 1:
|
||||
print(f"\n🔥 Average inference time over {args.iterations} runs: {avg_time / args.iterations:.4f} seconds")
|
||||
# Adjust average calculation to exclude potential first-run overhead
|
||||
effective_iterations = max(1, args.iterations)
|
||||
print(
|
||||
f"\n🔥 Average inference time over {effective_iterations} runs: {avg_time / effective_iterations:.4f} seconds")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
101
scripts/run_face_search.py
Normal file
101
scripts/run_face_search.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import cv2
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
# Use the new high-level factory functions
|
||||
from uniface.detection import create_detector
|
||||
from uniface.recognition import create_recognizer
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path: str) -> np.ndarray:
|
||||
"""Extracts a normalized embedding from the first face found in an image."""
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
faces = detector.detect(image)
|
||||
if not faces:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
# Get landmarks from the first detected face dictionary
|
||||
landmarks = np.array(faces[0]['landmarks'])
|
||||
|
||||
# Use normalized embedding for more reliable similarity comparison
|
||||
embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
return embedding
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding: np.ndarray, threshold: float = 0.4):
|
||||
"""Run real-time face recognition from a webcam feed."""
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
print("Webcam started. Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Loop through each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from the dictionary
|
||||
bbox = face['bbox']
|
||||
landmarks = np.array(face['landmarks'])
|
||||
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
|
||||
# Get the normalized embedding for the current face
|
||||
embedding = recognizer.get_normalized_embedding(frame, landmarks)
|
||||
|
||||
# Compare with the reference embedding
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
|
||||
# Draw results
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser.add_argument(
|
||||
"--detector",
|
||||
type=str,
|
||||
default="scrfd",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Initializing models...")
|
||||
detector = create_detector(method=args.detector)
|
||||
recognizer = create_recognizer(method=args.recognizer)
|
||||
|
||||
print("Extracting reference embedding...")
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -2,18 +2,21 @@ import cv2
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.recognition import ArcFace
|
||||
# Use the new high-level factory functions for consistency
|
||||
from uniface.detection import create_detector
|
||||
from uniface.recognition import create_recognizer
|
||||
|
||||
# Import enums for argument choices
|
||||
from uniface.constants import RetinaFaceWeights, ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
|
||||
def run_inference(detector, recognizer, image_path):
|
||||
def run_inference(detector, recognizer, image_path: str):
|
||||
"""
|
||||
Detect faces and extract embeddings from a single image.
|
||||
|
||||
Args:
|
||||
detector (RetinaFace): Initialized face detector.
|
||||
recognizer (ArcFace): Face recognition model.
|
||||
detector: Initialized face detector.
|
||||
recognizer: Initialized face recognition model.
|
||||
image_path (str): Path to the input image.
|
||||
"""
|
||||
image = cv2.imread(image_path)
|
||||
@@ -21,36 +24,53 @@ def run_inference(detector, recognizer, image_path):
|
||||
print(f"Error: Failed to load image from '{image_path}'")
|
||||
return
|
||||
|
||||
boxes, landmarks = detector.detect(image)
|
||||
faces = detector.detect(image)
|
||||
|
||||
if len(boxes) == 0:
|
||||
if not faces:
|
||||
print("No faces detected.")
|
||||
return
|
||||
|
||||
print(f"Detected {len(boxes)} face(s). Extracting embeddings...")
|
||||
print(f"Detected {len(faces)} face(s). Extracting embeddings for the first face...")
|
||||
|
||||
for i, landmark in enumerate(landmarks[:1]):
|
||||
embedding = recognizer.get_embedding(image, landmark)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmark)
|
||||
print("embedding:", np.sum(embedding))
|
||||
print("norm embedding:",np.sum(norm_embedding))
|
||||
# Process the first detected face
|
||||
first_face = faces[0]
|
||||
landmarks = np.array(first_face['landmarks']) # Convert landmarks to numpy array
|
||||
|
||||
# Extract embedding using the landmarks from the face dictionary
|
||||
embedding = recognizer.get_embedding(image, landmarks)
|
||||
norm_embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
|
||||
# Print some info about the embeddings
|
||||
print(f" - Embedding shape: {embedding.shape}")
|
||||
print(f" - L2 norm of unnormalized embedding: {np.linalg.norm(embedding):.4f}")
|
||||
print(f" - L2 norm of normalized embedding: {np.linalg.norm(norm_embedding):.4f}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Extract face embeddings from a single image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the input image.")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
"--detector",
|
||||
type=str,
|
||||
default="MNET_V2",
|
||||
choices=[m.name for m in RetinaFaceWeights],
|
||||
help="RetinaFace model variant to use."
|
||||
default="retinaface",
|
||||
choices=['retinaface', 'scrfd'],
|
||||
help="Face detection method to use."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--recognizer",
|
||||
type=str,
|
||||
default="arcface",
|
||||
choices=['arcface', 'mobileface', 'sphereface'],
|
||||
help="Face recognition method to use."
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
|
||||
recognizer = ArcFace()
|
||||
print(f"Initializing detector: {args.detector}")
|
||||
detector = create_detector(method=args.detector, model_name=RetinaFaceWeights.MNET_V2)
|
||||
|
||||
print(f"Initializing recognizer: {args.recognizer}")
|
||||
recognizer = create_recognizer(method=args.recognizer)
|
||||
|
||||
run_inference(detector, recognizer, args.image)
|
||||
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
import cv2
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.recognition import ArcFace
|
||||
from uniface.face_utils import compute_similarity
|
||||
|
||||
|
||||
def extract_reference_embedding(detector, recognizer, image_path):
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
raise RuntimeError(f"Failed to load image: {image_path}")
|
||||
|
||||
boxes, landmarks = detector.detect(image)
|
||||
if len(boxes) == 0:
|
||||
raise RuntimeError("No faces found in reference image.")
|
||||
|
||||
embedding = recognizer.get_embedding(image, landmarks[0])
|
||||
return embedding
|
||||
|
||||
|
||||
def run_video(detector, recognizer, ref_embedding, threshold=0.30):
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
raise RuntimeError("Webcam could not be opened.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
boxes, landmarks = detector.detect(frame)
|
||||
|
||||
for box, lm in zip(boxes, landmarks):
|
||||
x1, y1, x2, y2 = map(int, box[:4])
|
||||
embedding = recognizer.get_embedding(frame, lm)
|
||||
sim = compute_similarity(ref_embedding, embedding)
|
||||
label = f"Match ({sim:.2f})" if sim > threshold else f"Unknown ({sim:.2f})"
|
||||
color = (0, 255, 0) if sim > threshold else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||||
cv2.putText(frame, label, (x1, y1 - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
|
||||
|
||||
cv2.imshow("Face Recognition", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Face recognition using a reference image.")
|
||||
parser.add_argument("--image", type=str, required=True, help="Path to the reference face image.")
|
||||
parser.add_argument("--model", type=str, default="MNET_V2",
|
||||
choices=[m.name for m in RetinaFaceWeights], help="Face detector model.")
|
||||
args = parser.parse_args()
|
||||
|
||||
detector = RetinaFace(model_name=RetinaFaceWeights[args.model])
|
||||
recognizer = ArcFace()
|
||||
ref_embedding = extract_reference_embedding(detector, recognizer, args.image)
|
||||
run_video(detector, recognizer, ref_embedding)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -15,7 +15,10 @@ __license__ = "MIT"
|
||||
__author__ = "Yakhyokhuja Valikhujaev"
|
||||
__version__ = "0.1.8"
|
||||
|
||||
|
||||
from .detection import detect_faces, create_detector, list_available_detectors
|
||||
from .recognition import create_recognizer
|
||||
from .landmark import create_landmarker
|
||||
|
||||
from uniface.face_utils import face_alignment, compute_similarity
|
||||
from uniface.model_store import verify_model_weights
|
||||
@@ -25,22 +28,20 @@ from uniface.log import Logger
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Metadata
|
||||
"__version__",
|
||||
"__author__",
|
||||
"__license__",
|
||||
'__author__',
|
||||
'__license__',
|
||||
'__version__',
|
||||
|
||||
# Core functions
|
||||
'detect_faces',
|
||||
'create_detector',
|
||||
'create_landmarker',
|
||||
'create_recognizer',
|
||||
'detect_faces',
|
||||
'list_available_detectors',
|
||||
|
||||
# Utility functions
|
||||
"face_alignment",
|
||||
"compute_similarity",
|
||||
"verify_model_weights",
|
||||
"draw_detections",
|
||||
'compute_similarity',
|
||||
'draw_detections',
|
||||
'face_alignment',
|
||||
'verify_model_weights',
|
||||
|
||||
# Classes
|
||||
"Logger",
|
||||
'Logger'
|
||||
]
|
||||
|
||||
@@ -1 +1,32 @@
|
||||
from .model import Landmark
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from .models import Landmark106
|
||||
from .base import BaseLandmarker
|
||||
|
||||
|
||||
def create_landmarker(method: str = '2d106', **kwargs) -> BaseLandmarker:
|
||||
"""
|
||||
Factory function to create facial landmark predictors.
|
||||
|
||||
Args:
|
||||
method (str): Landmark prediction method. Options: '106'.
|
||||
**kwargs: Model-specific parameters.
|
||||
|
||||
Returns:
|
||||
Initialized landmarker instance.
|
||||
"""
|
||||
method = method.lower()
|
||||
if method == 'insightface_106':
|
||||
return Landmark106(**kwargs)
|
||||
else:
|
||||
available = ['insightface_106']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_landmarker",
|
||||
"Landmark106",
|
||||
"BaseLandmarker"
|
||||
]
|
||||
|
||||
30
uniface/landmark/base.py
Normal file
30
uniface/landmark/base.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
|
||||
|
||||
class BaseLandmarker(ABC):
|
||||
"""
|
||||
Abstract Base Class for all facial landmark models.
|
||||
"""
|
||||
@abstractmethod
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predicts facial landmarks for a given face bounding box.
|
||||
|
||||
This method defines the standard interface for all landmark predictors.
|
||||
It takes a full image and a bounding box for a single face and returns
|
||||
the predicted keypoints for that face.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (N, 2),
|
||||
where N is the number of landmarks.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
@@ -1,247 +0,0 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
|
||||
__all__ = ['Landmark']
|
||||
|
||||
|
||||
class Landmark:
|
||||
"""
|
||||
Facial landmark detection model for predicting 106 facial keypoints using ONNX model.
|
||||
|
||||
This class wraps a pretrained facial landmark model to detect 106 key facial points
|
||||
such as eyes, eyebrows, nose, lips, and jawline from a given face bounding box.
|
||||
It handles model verification, input preprocessing, ONNX inference execution,
|
||||
and projection of landmark coordinates back to the original image space.
|
||||
|
||||
Attributes:
|
||||
input_size (Tuple[int, int]): Model's expected input resolution (width, height).
|
||||
input_mean (float): Mean value used for input normalization.
|
||||
input_std (float): Standard deviation used for input normalization.
|
||||
model_path (str): Path to the verified ONNX model file.
|
||||
session (onnxruntime.InferenceSession): ONNX Runtime session for inference.
|
||||
input_names (List[str]): List of input node names.
|
||||
output_names (List[str]): List of output node names.
|
||||
lmk_dim (int): Number of dimensions per landmark point (typically 2 for x, y).
|
||||
lmk_num (int): Total number of landmark points predicted by the model (106).
|
||||
|
||||
Args:
|
||||
model_name (LandmarkWeights): Enum specifying the landmark model to load.
|
||||
input_size (Tuple[int, int]): Resolution for model input; defaults to (192, 192).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192)
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the Facial Landmark model for inference.
|
||||
|
||||
Args:
|
||||
model_name: Enum specifying which landmark model weights to use
|
||||
input_size: Input resolution for the model (width, height)
|
||||
"""
|
||||
Logger.info(
|
||||
f"Initializing Facial Landmark with model={model_name}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
|
||||
# Initialize configuration
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
|
||||
# Get path to model weights
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self.model_path}")
|
||||
|
||||
# Initialize model
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self):
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
|
||||
# Get input configuration
|
||||
input_metadata = self.session.get_inputs()[0]
|
||||
input_shape = input_metadata.shape
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
|
||||
|
||||
# Get input/output names
|
||||
self.input_names = [input.name for input in self.session.get_inputs()]
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
|
||||
# Determine landmark dimensions from output shape
|
||||
output_shape = self.session.get_outputs()[0].shape
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}")
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Preprocess the input image and bounding box for inference.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format
|
||||
bbox: Bounding box coordinates [x1, y1, x2, y2]
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
- Preprocessed image blob ready for inference
|
||||
- Transformation matrix for mapping predictions back to original image
|
||||
"""
|
||||
# Calculate face dimensions and center
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = (bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2
|
||||
|
||||
# Determine scale to fit face with some margin
|
||||
scale = self.input_size[0] / (max(width, height) * 1.5)
|
||||
rotation = 0.0
|
||||
|
||||
# Align face using center, scale and rotation
|
||||
aligned_face, transform_matrix = bbox_center_alignment(
|
||||
image, center, self.input_size[0], scale, rotation
|
||||
)
|
||||
|
||||
# Convert to blob format for inference
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face,
|
||||
1.0 / self.input_std,
|
||||
self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean),
|
||||
swapRB=True # Convert BGR to RGB
|
||||
)
|
||||
|
||||
return face_blob, transform_matrix
|
||||
|
||||
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Convert raw model predictions to image coordinates.
|
||||
|
||||
Args:
|
||||
predictions: Raw landmark coordinates from model output
|
||||
transform_matrix: Affine transformation matrix from preprocessing
|
||||
|
||||
Returns:
|
||||
Landmarks in original image coordinates
|
||||
"""
|
||||
# Reshape to pairs of x,y coordinates
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
|
||||
# Denormalize coordinates to pixel space
|
||||
landmarks[:, 0:2] += 1 # Shift from [-1,1] to [0,2] range
|
||||
landmarks[:, 0:2] *= (self.input_size[0] // 2) # Scale to pixel coordinates
|
||||
|
||||
# Invert the transformation to map back to original image
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
|
||||
return landmarks
|
||||
|
||||
def predict(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Predict facial landmarks for the given image and face bounding box.
|
||||
|
||||
Args:
|
||||
image: Input image in BGR format
|
||||
bbox: Face bounding box [x1, y1, x2, y2]
|
||||
|
||||
Returns:
|
||||
Array of facial landmarks in original image coordinates
|
||||
"""
|
||||
# Preprocess image
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
|
||||
# Run inference
|
||||
raw_predictions = self.session.run(
|
||||
self.output_names,
|
||||
{self.input_names[0]: face_blob}
|
||||
)[0][0]
|
||||
|
||||
# Postprocess to get landmarks in original image space
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
|
||||
return landmarks
|
||||
|
||||
# TODO: For testing purposes only, remote later
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from uniface.detection import RetinaFace
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
|
||||
face_detector = RetinaFace(
|
||||
model_name=RetinaFaceWeights.MNET_V2,
|
||||
conf_thresh=0.5,
|
||||
pre_nms_topk=5000,
|
||||
nms_thresh=0.4,
|
||||
post_nms_topk=750,
|
||||
dynamic_size=False,
|
||||
input_size=(640, 640)
|
||||
)
|
||||
|
||||
model = Landmark()
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Webcam not available.")
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Frame capture failed.")
|
||||
break
|
||||
|
||||
boxes, landmarks = face_detector.detect(frame)
|
||||
|
||||
if boxes is None or len(boxes) == 0:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
|
||||
for box in boxes:
|
||||
x1, y1, x2, y2, score = box.astype(int)
|
||||
|
||||
lmk = model.predict(frame, box[:4])
|
||||
|
||||
for (x, y) in lmk.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
217
uniface/landmark/models.py
Normal file
217
uniface/landmark/models.py
Normal file
@@ -0,0 +1,217 @@
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from typing import Tuple
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.constants import LandmarkWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import bbox_center_alignment, transform_points_2d
|
||||
from .base import BaseLandmarker
|
||||
|
||||
__all__ = ['Landmark']
|
||||
|
||||
|
||||
class Landmark106(BaseLandmarker):
|
||||
"""Facial landmark model for predicting 106 facial keypoints.
|
||||
|
||||
This class implements the BaseLandmarker and provides an end-to-end
|
||||
pipeline for 106-point facial landmark detection. It handles model
|
||||
loading, preprocessing of a face crop based on a bounding box,
|
||||
inference, and post-processing to map landmarks back to the
|
||||
original image coordinates.
|
||||
|
||||
Args:
|
||||
model_name (LandmarkWeights): The enum specifying the landmark model to load.
|
||||
Defaults to `LandmarkWeights.DEFAULT`.
|
||||
input_size (Tuple[int, int]): The resolution (width, height) for the model's
|
||||
input. Defaults to (192, 192).
|
||||
|
||||
Example:
|
||||
>>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
|
||||
>>> # bbox = [x1, y1, x2, y2]
|
||||
>>>
|
||||
>>> landmarker = Landmark106()
|
||||
>>> landmarks = landmarker.get_landmarks(image, bbox)
|
||||
>>> print(landmarks.shape)
|
||||
(106, 2)
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
|
||||
input_size: Tuple[int, int] = (192, 192)
|
||||
) -> None:
|
||||
Logger.info(
|
||||
f"Initializing Facial Landmark with model={model_name}, "
|
||||
f"input_size={input_size}"
|
||||
)
|
||||
self.input_size = input_size
|
||||
self.input_std = 1.0
|
||||
self.input_mean = 0.0
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self):
|
||||
"""
|
||||
Initialize the ONNX model from the stored model path.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the model fails to load or initialize.
|
||||
"""
|
||||
try:
|
||||
self.session = ort.InferenceSession(
|
||||
self.model_path,
|
||||
providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
|
||||
)
|
||||
|
||||
# Get input configuration
|
||||
input_metadata = self.session.get_inputs()[0]
|
||||
input_shape = input_metadata.shape
|
||||
self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
|
||||
|
||||
# Get input/output names
|
||||
self.input_names = [input.name for input in self.session.get_inputs()]
|
||||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||||
|
||||
# Determine landmark dimensions from output shape
|
||||
output_shape = self.session.get_outputs()[0].shape
|
||||
self.lmk_dim = 2 # x,y coordinates
|
||||
self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
|
||||
|
||||
Logger.info(f"Model initialized with {self.lmk_num} landmarks")
|
||||
|
||||
except Exception as e:
|
||||
Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
|
||||
raise RuntimeError(f"Failed to initialize landmark model: {e}")
|
||||
|
||||
def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Prepares a face crop for inference.
|
||||
|
||||
This method takes a face bounding box, performs a center alignment to
|
||||
warp the face into the model's required input size, and then creates
|
||||
a normalized blob ready for the ONNX session.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, np.ndarray]: A tuple containing:
|
||||
- The preprocessed image blob ready for inference.
|
||||
- The affine transformation matrix used for alignment.
|
||||
"""
|
||||
width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
||||
center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
||||
scale = self.input_size[0] / (max(width, height) * 1.5)
|
||||
|
||||
aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
|
||||
|
||||
face_blob = cv2.dnn.blobFromImage(
|
||||
aligned_face, 1.0 / self.input_std, self.input_size,
|
||||
(self.input_mean, self.input_mean, self.input_mean), swapRB=True
|
||||
)
|
||||
return face_blob, transform_matrix
|
||||
|
||||
def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
|
||||
"""Converts raw model predictions back to original image coordinates.
|
||||
|
||||
This method reshapes the model's flat output array into landmark points,
|
||||
denormalizes them to the model's input space, and then applies an
|
||||
inverse affine transformation to map them back to the original image space.
|
||||
|
||||
Args:
|
||||
predictions (np.ndarray): Raw landmark coordinates from the model output.
|
||||
transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of landmark points in the original image's coordinates.
|
||||
"""
|
||||
landmarks = predictions.reshape((-1, 2))
|
||||
landmarks[:, 0:2] += 1
|
||||
landmarks[:, 0:2] *= (self.input_size[0] // 2)
|
||||
|
||||
inverse_matrix = cv2.invertAffineTransform(transform_matrix)
|
||||
landmarks = transform_points_2d(landmarks, inverse_matrix)
|
||||
return landmarks
|
||||
|
||||
def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
|
||||
"""Predicts facial landmarks for the given image and face bounding box.
|
||||
|
||||
This is the main public method that orchestrates the full pipeline of
|
||||
preprocessing, inference, and post-processing.
|
||||
|
||||
Args:
|
||||
image (np.ndarray): The full source image in BGR format.
|
||||
bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
|
||||
|
||||
Returns:
|
||||
np.ndarray: An array of predicted landmark points with shape (106, 2).
|
||||
"""
|
||||
face_blob, transform_matrix = self.preprocess(image, bbox)
|
||||
raw_predictions = self.session.run(
|
||||
self.output_names, {self.input_names[0]: face_blob}
|
||||
)[0][0]
|
||||
landmarks = self.postprocess(raw_predictions, transform_matrix)
|
||||
return landmarks
|
||||
|
||||
|
||||
|
||||
# TODO: For testing purposes only, remote later
|
||||
if __name__ == "__main__":
|
||||
# UPDATED: Use the high-level factory functions
|
||||
from uniface.detection import create_detector
|
||||
from uniface.landmark import create_landmarker
|
||||
|
||||
# 1. Create the detector and landmarker using the new API
|
||||
face_detector = create_detector('retinaface')
|
||||
landmarker = create_landmarker() # Uses the default '106' method
|
||||
|
||||
cap = cv2.VideoCapture(0)
|
||||
if not cap.isOpened():
|
||||
print("Webcam not available.")
|
||||
exit()
|
||||
|
||||
print("Press 'q' to quit.")
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Frame capture failed.")
|
||||
break
|
||||
|
||||
# 2. The detect method returns a list of dictionaries
|
||||
faces = face_detector.detect(frame)
|
||||
|
||||
if not faces:
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
continue
|
||||
|
||||
# 3. Loop through the list of face dictionaries
|
||||
for face in faces:
|
||||
# Extract the bounding box
|
||||
bbox = face['bbox']
|
||||
|
||||
# 4. Get landmarks for the current face using its bounding box
|
||||
landmarks = landmarker.get_landmarks(frame, bbox)
|
||||
|
||||
# --- Drawing Logic ---
|
||||
# Draw the landmarks
|
||||
for (x, y) in landmarks.astype(int):
|
||||
cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
|
||||
|
||||
# Draw the bounding box
|
||||
x1, y1, x2, y2 = map(int, bbox)
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
||||
|
||||
cv2.imshow("Facial Landmark Detection", frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
@@ -1,2 +1,63 @@
|
||||
from .base import PreprocessConfig
|
||||
from .models import SphereFace, MobileFace, ArcFace
|
||||
# Copyright 2025 Yakhyokhuja Valikhujaev
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Dict
|
||||
from .models import ArcFace, MobileFace, SphereFace
|
||||
from .base import BaseRecognizer
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
|
||||
def create_recognizer(method: str = 'arcface', **kwargs) -> BaseRecognizer:
|
||||
"""
|
||||
Factory function to create face recognizers.
|
||||
|
||||
This function initializes and returns a face recognizer instance based on the
|
||||
specified method. It acts as a high-level interface to the underlying
|
||||
model classes like ArcFace, MobileFace, etc.
|
||||
|
||||
Args:
|
||||
method (str): The recognition method to use.
|
||||
Options: 'arcface' (default), 'mobileface', 'sphereface'.
|
||||
**kwargs: Model-specific parameters passed to the recognizer's constructor.
|
||||
For example, `model_name` can be used to select a specific
|
||||
pre-trained weight from the available enums (e.g., `ArcFaceWeights.MNET`).
|
||||
|
||||
Returns:
|
||||
BaseRecognizer: An initialized recognizer instance ready for use.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified `method` is not supported.
|
||||
|
||||
Examples:
|
||||
>>> # Create the default ArcFace recognizer
|
||||
>>> recognizer = create_recognizer()
|
||||
|
||||
>>> # Create a specific MobileFace recognizer
|
||||
>>> from uniface.constants import MobileFaceWeights
|
||||
>>> recognizer = create_recognizer(
|
||||
... 'mobileface',
|
||||
... model_name=MobileFaceWeights.MNET_V2
|
||||
... )
|
||||
|
||||
>>> # Create a SphereFace recognizer
|
||||
>>> recognizer = create_recognizer('sphereface')
|
||||
"""
|
||||
method = method.lower()
|
||||
|
||||
if method == 'arcface':
|
||||
return ArcFace(**kwargs)
|
||||
elif method == 'mobileface':
|
||||
return MobileFace(**kwargs)
|
||||
elif method == 'sphereface':
|
||||
return SphereFace(**kwargs)
|
||||
else:
|
||||
available = ['arcface', 'mobileface', 'sphereface']
|
||||
raise ValueError(f"Unsupported method: '{method}'. Available: {available}")
|
||||
|
||||
__all__ = [
|
||||
"create_recognizer",
|
||||
"ArcFace",
|
||||
"MobileFace",
|
||||
"SphereFace",
|
||||
"BaseRecognizer",
|
||||
]
|
||||
@@ -2,20 +2,15 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import cv2
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
from dataclasses import dataclass
|
||||
|
||||
from typing import Tuple, Union, List
|
||||
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.face_utils import face_alignment
|
||||
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
|
||||
|
||||
|
||||
__all__ = ["BaseModel", "PreprocessConfig"]
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -28,38 +23,25 @@ class PreprocessConfig:
|
||||
input_size: Tuple[int, int] = (112, 112)
|
||||
|
||||
|
||||
class BaseModel:
|
||||
class BaseRecognizer(ABC):
|
||||
"""
|
||||
Unified Face Encoder supporting multiple model families (e.g., SphereFace, MobileFace).
|
||||
Abstract Base Class for all face recognition models.
|
||||
It provides the core functionality for preprocessing, inference, and embedding extraction.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: Union[SphereFaceWeights, MobileFaceWeights, ArcFaceWeights] = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: PreprocessConfig = PreprocessConfig(),
|
||||
) -> None:
|
||||
@abstractmethod
|
||||
def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
|
||||
"""
|
||||
Initializes the FaceEncoder model for inference.
|
||||
Initializes the model. Subclasses must call this.
|
||||
|
||||
Args:
|
||||
model_name: Selected model weight enum.
|
||||
preprocessing: Configuration for input normalization and resizing.
|
||||
model_path (str): The direct path to the verified ONNX model.
|
||||
preprocessing (PreprocessConfig): The configuration for preprocessing.
|
||||
"""
|
||||
# Store preprocessing parameters
|
||||
self.input_mean = preprocessing.input_mean
|
||||
self.input_std = preprocessing.input_std
|
||||
self.input_size = preprocessing.input_size
|
||||
|
||||
Logger.info(
|
||||
f"Initializing Face Recognition with model={model_name}, "
|
||||
f"input_mean={self.input_mean}, input_std={self.input_std}, "
|
||||
f"input_size={self.input_size}"
|
||||
)
|
||||
|
||||
# Get path to model weights and initialize model
|
||||
self.model_path = verify_model_weights(model_name)
|
||||
Logger.info(f"Verified model weights located at: {self.model_path}")
|
||||
|
||||
self.model_path = model_path
|
||||
self._initialize_model()
|
||||
|
||||
def _initialize_model(self) -> None:
|
||||
@@ -152,14 +134,15 @@ class BaseModel:
|
||||
|
||||
def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Extracts l2 normalized face embedding vector from an image
|
||||
Extracts a l2 normalized face embedding vector from an image.
|
||||
|
||||
Args:
|
||||
image: Input face image (BGR format).
|
||||
landmarks: Facial landmarks (5 points for alignment).
|
||||
|
||||
Returns:
|
||||
Normalied face embedding vector (typically 512-dimensional).
|
||||
Normalized face embedding vector (typically 512-dimensional).
|
||||
"""
|
||||
embedding = self.get_embedding(image, landmarks)
|
||||
return embedding / np.linalg.norm(embedding)
|
||||
norm = np.linalg.norm(embedding)
|
||||
return embedding / norm if norm > 0 else embedding
|
||||
|
||||
@@ -4,27 +4,35 @@
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from uniface.constants import SphereFaceWeights, MobileFaceWeights, ArcFaceWeights
|
||||
from .base import BaseModel, PreprocessConfig
|
||||
from uniface.constants import ArcFaceWeights, MobileFaceWeights, SphereFaceWeights
|
||||
from uniface.model_store import verify_model_weights
|
||||
from .base import BaseRecognizer, PreprocessConfig
|
||||
|
||||
__all__ = ["ArcFace", "MobileFace", "SphereFace"]
|
||||
|
||||
|
||||
__all__ = ["SphereFace", "MobileFace", "ArcFace"]
|
||||
class ArcFace(BaseRecognizer):
|
||||
"""ArcFace model for robust face recognition.
|
||||
|
||||
|
||||
class SphereFace(BaseModel):
|
||||
"""
|
||||
SphereFace face encoder class.
|
||||
|
||||
This class loads a SphereFace model for face embedding extraction.
|
||||
It supports configurable preprocessing, with a default mean/std and input size of 112x112.
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for ArcFace models. It handles the loading of specific
|
||||
ArcFace weights and sets up the appropriate default preprocessing.
|
||||
|
||||
Args:
|
||||
model_name (SphereFaceWeights): Enum value representing the model to load. Defaults to SphereFaceWeights.SPHERE20.
|
||||
preprocessing (Optional[PreprocessConfig]): Preprocessing config (mean, std, size). Defaults to standard 112x112 with normalization.
|
||||
model_name (ArcFaceWeights): The specific ArcFace model variant to use.
|
||||
Defaults to `ArcFaceWeights.MNET`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for ArcFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import ArcFace
|
||||
>>> recognizer = ArcFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
self,
|
||||
model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
@@ -33,23 +41,32 @@ class SphereFace(BaseModel):
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_name=model_name, preprocessing=preprocessing)
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class MobileFace(BaseModel):
|
||||
"""
|
||||
MobileFace face encoder class.
|
||||
class MobileFace(BaseRecognizer):
|
||||
"""Lightweight MobileFaceNet model for fast face recognition.
|
||||
|
||||
Loads a lightweight MobileFaceNet model for fast face embedding extraction.
|
||||
Default input normalization and resizing applied if preprocessing is not provided.
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for MobileFaceNet models. It is optimized for speed,
|
||||
making it suitable for edge devices.
|
||||
|
||||
Args:
|
||||
model_name (MobileFaceWeights): Enum value specifying the MobileFace model. Defaults to MobileFaceWeights.MNET_V2.
|
||||
preprocessing (Optional[PreprocessConfig]): Preprocessing config. If None, uses standard normalization and 112x112 input size.
|
||||
model_name (MobileFaceWeights): The specific MobileFaceNet model variant to use.
|
||||
Defaults to `MobileFaceWeights.MNET_V2`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for MobileFaceNet is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import MobileFace
|
||||
>>> recognizer = MobileFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
self,
|
||||
model_name: MobileFaceWeights = MobileFaceWeights.MNET_V2,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
@@ -58,23 +75,32 @@ class MobileFace(BaseModel):
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_name=model_name)
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
|
||||
class ArcFace(BaseModel):
|
||||
"""
|
||||
ArcFace face encoder class.
|
||||
class SphereFace(BaseRecognizer):
|
||||
"""SphereFace model using angular margin for face recognition.
|
||||
|
||||
Loads an ArcFace model (e.g., ResNet-based) for robust face recognition embedding generation.
|
||||
Applies standard preprocessing unless overridden.
|
||||
This class provides a concrete implementation of the BaseRecognizer,
|
||||
pre-configured for SphereFace models, which were among the first to
|
||||
introduce angular margin loss functions.
|
||||
|
||||
Args:
|
||||
model_name (ArcFaceWeights): Enum for the ArcFace model variant. Defaults to ArcFaceWeights.MNET.
|
||||
preprocessing (Optional[PreprocessConfig]): Preprocessing settings. Defaults to standard normalization and resizing if not specified.
|
||||
model_name (SphereFaceWeights): The specific SphereFace model variant to use.
|
||||
Defaults to `SphereFaceWeights.SPHERE20`.
|
||||
preprocessing (Optional[PreprocessConfig]): An optional custom preprocessing
|
||||
configuration. If None, a default config for SphereFace is used.
|
||||
|
||||
Example:
|
||||
>>> from uniface.recognition import SphereFace
|
||||
>>> recognizer = SphereFace()
|
||||
>>> # embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, model_name: ArcFaceWeights = ArcFaceWeights.MNET,
|
||||
self,
|
||||
model_name: SphereFaceWeights = SphereFaceWeights.SPHERE20,
|
||||
preprocessing: Optional[PreprocessConfig] = None
|
||||
) -> None:
|
||||
if preprocessing is None:
|
||||
@@ -83,4 +109,6 @@ class ArcFace(BaseModel):
|
||||
input_std=127.5,
|
||||
input_size=(112, 112)
|
||||
)
|
||||
super().__init__(model_name=model_name)
|
||||
|
||||
model_path = verify_model_weights(model_name)
|
||||
super().__init__(model_path=model_path, preprocessing=preprocessing)
|
||||
|
||||
@@ -4,42 +4,47 @@
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from typing import List, Union
|
||||
|
||||
|
||||
def draw_detections(image, detections, vis_threshold: float = 0.6):
|
||||
def draw_detections(
|
||||
image: np.ndarray,
|
||||
bboxes: Union[np.ndarray, List[List[float]]],
|
||||
scores: Union[np.ndarray, List[float]],
|
||||
landmarks: Union[np.ndarray, List[List[List[float]]]],
|
||||
vis_threshold: float = 0.6
|
||||
):
|
||||
"""
|
||||
Draw bounding boxes and landmarks on the image with thickness scaled by bbox size.
|
||||
Draws bounding boxes, scores, and landmarks from separate lists onto an image.
|
||||
|
||||
Args:
|
||||
image (ndarray): Image to draw detections on.
|
||||
detections (tuple): (bounding boxes, landmarks) as NumPy arrays.
|
||||
vis_threshold (float): Confidence threshold for filtering detections.
|
||||
image (np.ndarray): The image to draw on.
|
||||
bboxes (list or np.ndarray): A list of bounding boxes, e.g., [[x1,y1,x2,y2], ...].
|
||||
scores (list or np.ndarray): A list of confidence scores.
|
||||
landmarks (list or np.ndarray): A list of landmark sets, e.g., [[[x,y],...],...].
|
||||
vis_threshold (float): Confidence threshold for filtering which detections to draw.
|
||||
"""
|
||||
|
||||
_colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
||||
|
||||
# Unpack detections
|
||||
boxes, landmarks = detections
|
||||
scores = boxes[:, 4]
|
||||
# Filter detections by score
|
||||
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
||||
|
||||
# Filter detections by confidence threshold
|
||||
filtered = scores >= vis_threshold
|
||||
boxes = boxes[filtered, :4].astype(np.int32)
|
||||
landmarks = landmarks[filtered]
|
||||
scores = scores[filtered]
|
||||
# Draw the filtered detections
|
||||
for i in keep_indices:
|
||||
bbox = np.array(bboxes[i], dtype=np.int32)
|
||||
score = scores[i]
|
||||
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
||||
|
||||
# Draw bounding boxes, scores, and landmarks
|
||||
for box, score, landmark in zip(boxes, scores, landmarks):
|
||||
# Calculate thickness proportional to the bbox size
|
||||
thickness = max(1, int(min(box[2] - box[0], box[3] - box[1]) / 100))
|
||||
# Calculate adaptive thickness
|
||||
thickness = max(1, int(min(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 100))
|
||||
|
||||
# Draw rectangle
|
||||
cv2.rectangle(image, tuple(box[:2]), tuple(box[2:]), (0, 0, 255), thickness)
|
||||
# Draw bounding box
|
||||
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 0, 255), thickness)
|
||||
|
||||
# Draw score
|
||||
cv2.putText(image, f"{score:.2f}", (box[0], box[1] + 12),
|
||||
cv2.putText(image, f"{score:.2f}", (bbox[0], bbox[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), thickness)
|
||||
|
||||
# Draw landmarks
|
||||
for point, color in zip(landmark, _colors):
|
||||
cv2.circle(image, tuple(point), thickness, color, -1)
|
||||
for j, point in enumerate(landmark_set):
|
||||
cv2.circle(image, tuple(point), thickness + 1, _colors[j], -1)
|
||||
|
||||
Reference in New Issue
Block a user