Merge pull request #42 from yakhyo/feat/standardize-outputs

feat: Standardize detection output and several other updates
This commit is contained in:
Yakhyokhuja Valikhujaev
2025-12-24 00:38:32 +09:00
committed by GitHub
20 changed files with 188 additions and 192 deletions

View File

@@ -21,11 +21,28 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
### Code Style
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
```bash
# Check for linting errors
ruff check .
# Auto-fix linting errors
ruff check . --fix
# Format code
ruff format .
```
**Guidelines:**
- Follow PEP8 guidelines
- Use type hints (Python 3.10+)
- Write docstrings for public APIs
- Line length: 120 characters
- Keep code simple and readable
All PRs must pass `ruff check .` before merging.
## Development Setup
```bash

View File

@@ -39,9 +39,9 @@ faces = detector.detect(image)
# Print results
for i, face in enumerate(faces):
print(f"Face {i+1}:")
print(f" Confidence: {face['confidence']:.2f}")
print(f" BBox: {face['bbox']}")
print(f" Landmarks: {len(face['landmarks'])} points")
print(f" Confidence: {face.confidence:.2f}")
print(f" BBox: {face.bbox}")
print(f" Landmarks: {len(face.landmarks)} points")
```
**Output:**
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image)
# Extract visualization data
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
# Draw on image
draw_detections(
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
if faces1 and faces2:
# Extract embeddings
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compute similarity (cosine similarity)
similarity = np.dot(emb1, emb2.T)[0][0]
@@ -159,9 +159,9 @@ while True:
faces = detector.detect(frame)
# Draw results
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
bboxes=bboxes,
@@ -199,7 +199,7 @@ faces = detector.detect(image)
# Predict attributes
for i, face in enumerate(faces):
gender, age = age_gender.predict(image, face['bbox'])
gender, age = age_gender.predict(image, face.bbox)
gender_str = 'Female' if gender == 0 else 'Male'
print(f"Face {i+1}: {gender_str}, {age} years old")
```
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image)
if faces:
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
print(f"Detected {len(landmarks)} landmarks")
# Draw landmarks
@@ -262,8 +262,7 @@ faces = detector.detect(image)
# Estimate gaze for each face
for i, face in enumerate(faces):
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
x1, y1, x2, y2 = map(int, face.bbox[:4])
face_crop = image[y1:y2, x1:x2]
if face_crop.size > 0:
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
# Draw gaze direction
draw_gaze(image, bbox, pitch, yaw)
draw_gaze(image, face.bbox, pitch, yaw)
cv2.imwrite("gaze_output.jpg", image)
```
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image)
for i, face in enumerate(faces):
label_idx, score = spoofer.predict(image, face['bbox'])
label_idx, score = spoofer.predict(image, face.bbox)
# label_idx: 0 = Fake, 1 = Real
label = 'Real' if label_idx == 1 else 'Fake'
print(f"Face {i+1}: {label} ({score:.1%})")

View File

@@ -1,11 +1,15 @@
# UniFace: All-in-One Face Analysis Library
<div align="center">
[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![Python](https://img.shields.io/badge/Python-3.10%2B-blue)](https://www.python.org/)
[![PyPI](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
[![CI](https://github.com/yakhyo/uniface/actions/workflows/ci.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-yakhyo%2Funiface-blue.svg?logo=)](https://deepwiki.com/yakhyo/uniface)
[![Downloads](https://static.pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-AI_Docs-blue.svg?logo=bookstack)](https://deepwiki.com/yakhyo/uniface)
</div>
<div align="center">
<img src=".github/logos/logo_web.webp" width=75%>
@@ -101,9 +105,9 @@ faces = detector.detect(image)
# Process results
for face in faces:
bbox = face['bbox'] # [x1, y1, x2, y2]
confidence = face['confidence']
landmarks = face['landmarks'] # 5-point landmarks
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
confidence = face.confidence
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
print(f"Face detected with confidence: {confidence:.2f}")
```
@@ -121,8 +125,8 @@ recognizer = ArcFace()
faces1 = detector.detect(image1)
faces2 = detector.detect(image2)
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compare faces
similarity = compute_similarity(embedding1, embedding2)
@@ -138,7 +142,7 @@ detector = RetinaFace()
landmarker = Landmark106()
faces = detector.detect(image)
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
# Returns 106 (x, y) landmark points
```
@@ -151,7 +155,7 @@ detector = RetinaFace()
age_gender = AgeGender()
faces = detector.detect(image)
gender, age = age_gender.predict(image, faces[0]['bbox'])
gender, age = age_gender.predict(image, faces[0].bbox)
gender_str = 'Female' if gender == 0 else 'Male'
print(f"{gender_str}, {age} years old")
```
@@ -168,15 +172,14 @@ gaze_estimator = MobileGaze()
faces = detector.detect(image)
for face in faces:
bbox = face['bbox']
x1, y1, x2, y2 = map(int, bbox[:4])
x1, y1, x2, y2 = map(int, face.bbox[:4])
face_crop = image[y1:y2, x1:x2]
pitch, yaw = gaze_estimator.estimate(face_crop)
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
# Visualize
draw_gaze(image, bbox, pitch, yaw)
draw_gaze(image, face.bbox, pitch, yaw)
```
### Face Parsing
@@ -213,7 +216,7 @@ spoofer = MiniFASNet() # Uses V2 by default
faces = detector.detect(image)
for face in faces:
label_idx, score = spoofer.predict(image, face['bbox'])
label_idx, score = spoofer.predict(image, face.bbox)
# label_idx: 0 = Fake, 1 = Real
label = 'Real' if label_idx == 1 else 'Fake'
print(f"{label}: {score:.1%}")
@@ -458,9 +461,9 @@ while True:
faces = detector.detect(frame)
# Extract data for visualization
bboxes = [f['bbox'] for f in faces]
scores = [f['confidence'] for f in faces]
landmarks = [f['landmarks'] for f in faces]
bboxes = [f.bbox for f in faces]
scores = [f.confidence for f in faces]
landmarks = [f.landmarks for f in faces]
draw_detections(
image=frame,
@@ -494,7 +497,7 @@ for person_id, image_path in person_images.items():
faces = detector.detect(image)
if faces:
embedding = recognizer.get_normalized_embedding(
image, faces[0]['landmarks']
image, faces[0].landmarks
)
database[person_id] = embedding
@@ -503,7 +506,7 @@ query_image = cv2.imread("query.jpg")
query_faces = detector.detect(query_image)
if query_faces:
query_embedding = recognizer.get_normalized_embedding(
query_image, query_faces[0]['landmarks']
query_image, query_faces[0].landmarks
)
# Find best match

View File

@@ -48,7 +48,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],
@@ -140,13 +140,13 @@
"\n",
" # Draw detections\n",
" bbox_image = image.copy()\n",
" bboxes = [f['bbox'] for f in faces]\n",
" scores = [f['confidence'] for f in faces]\n",
" landmarks = [f['landmarks'] for f in faces]\n",
" bboxes = [f.bbox for f in faces]\n",
" scores = [f.confidence for f in faces]\n",
" landmarks = [f.landmarks for f in faces]\n",
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
" first_landmarks = faces[0]['landmarks']\n",
" first_landmarks = faces[0].landmarks\n",
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
"\n",
" # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
"- Default output size is 112x112 (standard for face recognition models)\n"
]

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

File diff suppressed because one or more lines are too long

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],
@@ -153,14 +153,14 @@
"# Load image\n",
"image = cv2.imread(image_path)\n",
"\n",
"# Detect faces - returns list of face dictionaries\n",
"# Detect faces - returns list of Face objects\n",
"faces = detector.detect(image)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"# Unpack face data for visualization\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"# Draw detections\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
"faces = detector.detect(image, max_num=2)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -258,9 +258,9 @@
"faces = detector.detect(image, max_num=5)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -274,7 +274,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
"- Use `max_num` to limit detected faces"
]

View File

@@ -46,7 +46,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.5.0\n"
"UniFace version: 1.6.0\n"
]
}
],
@@ -365,7 +365,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -379,7 +379,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -42,7 +42,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

View File

@@ -37,7 +37,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.4.0\n"
"UniFace version: 1.6.0\n"
]
}
],
@@ -152,8 +152,7 @@
"\n",
" # Estimate gaze for each face\n",
" for i, face in enumerate(faces):\n",
" bbox = face['bbox']\n",
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
" face_crop = image[y1:y2, x1:x2]\n",
"\n",
" if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
"\n",
" # Draw gaze without angle text\n",
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
"\n",
" # Convert BGR to RGB for display\n",
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -263,7 +262,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -1,6 +1,6 @@
[project]
name = "uniface"
version = "1.5.3"
version = "1.6.0"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
readme = "README.md"
license = { text = "MIT" }

View File

@@ -205,4 +205,3 @@ Examples:
if __name__ == '__main__':
main()

View File

@@ -13,7 +13,7 @@
__license__ = 'MIT'
__author__ = 'Yakhyokhuja Valikhujaev'
__version__ = '1.5.3'
__version__ = '1.6.0'
from uniface.face_utils import compute_similarity, face_alignment

View File

@@ -36,41 +36,24 @@ class FaceAnalyzer:
def analyze(self, image: np.ndarray) -> List[Face]:
"""Analyze faces in an image."""
detections = self.detector.detect(image)
Logger.debug(f'Detected {len(detections)} face(s)')
faces = self.detector.detect(image)
Logger.debug(f'Detected {len(faces)} face(s)')
faces = []
for idx, detection in enumerate(detections):
bbox = detection['bbox']
confidence = detection['confidence']
landmarks = detection['landmarks']
embedding = None
for idx, face in enumerate(faces):
if self.recognizer is not None:
try:
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
age, gender = None, None
if self.age_gender is not None:
try:
gender, age = self.age_gender.predict(image, bbox)
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
face.gender, face.age = self.age_gender.predict(image, face.bbox)
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.gender}')
except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
face = Face(
bbox=bbox,
confidence=confidence,
landmarks=landmarks,
embedding=embedding,
age=age,
gender=gender,
)
faces.append(face)
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
return faces

View File

@@ -7,6 +7,8 @@ from typing import Any, Dict, List
import numpy as np
from uniface.face import Face
from .base import BaseDetector
from .retinaface import RetinaFace
from .scrfd import SCRFD
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
_detector_cache: Dict[str, BaseDetector] = {}
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
"""
High-level face detection function.
@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
**kwargs: Additional arguments passed to the detector.
Returns:
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
- 'confidence' (float): The confidence score of the detection.
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
List[Face]: A list of Face objects, each containing:
- bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
- confidence (float): The confidence score of the detection.
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
Example:
>>> from uniface import detect_faces
>>> image = cv2.imread("your_image.jpg")
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
>>> for face in faces:
... print(f"Found face with confidence: {face['confidence']}")
... print(f"BBox: {face['bbox']}")
... print(f"Found face with confidence: {face.confidence}")
... print(f"BBox: {face.bbox}")
"""
method_name = method.lower()

View File

@@ -7,6 +7,8 @@ from typing import Any, Dict, List
import numpy as np
from uniface.face import Face
class BaseDetector(ABC):
"""
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
self.config = kwargs
@abstractmethod
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
"""
Detect faces in an image.
@@ -30,18 +32,17 @@ class BaseDetector(ABC):
**kwargs: Additional detection parameters
Returns:
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
or (68, 2) for 68-point landmarks. Empty array if not supported.
List[Face]: List of detected Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
"""
pass

View File

@@ -2,7 +2,7 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from typing import Any, List, Literal, Tuple
import numpy as np
@@ -14,6 +14,7 @@ from uniface.common import (
resize_image,
)
from uniface.constants import RetinaFaceWeights
from uniface.face import Face
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> List[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces
@@ -350,19 +351,12 @@ if __name__ == '__main__':
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Extract bbox and landmarks from Face object
draw_bbox(frame, face.bbox, face.confidence)
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Draw landmarks if available
if face.landmarks is not None and len(face.landmarks) > 0:
draw_keypoints(frame, face.landmarks)
# Display face count
cv2.putText(

View File

@@ -2,13 +2,14 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from typing import Any, List, Literal, Tuple
import cv2
import numpy as np
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
from uniface.constants import SCRFDWeights
from uniface.face import Face
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> List[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces
@@ -324,19 +325,12 @@ if __name__ == '__main__':
# Process each detected face
for face in faces:
# Extract bbox and landmarks from dictionary
bbox = face['bbox'] # [x1, y1, x2, y2]
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Extract bbox and landmarks from Face object
draw_bbox(frame, face.bbox, face.confidence)
# Pass bbox and confidence separately
draw_bbox(frame, bbox, confidence)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Draw landmarks if available
if face.landmarks is not None and len(face.landmarks) > 0:
draw_keypoints(frame, face.landmarks)
# Display face count
cv2.putText(

View File

@@ -2,13 +2,14 @@
# Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple
from typing import Any, List, Literal, Tuple
import cv2
import numpy as np
from uniface.common import non_max_suppression
from uniface.constants import YOLOv5FaceWeights
from uniface.face import Face
from uniface.log import Logger
from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
max_num: int = 0,
metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0,
) -> List[Dict[str, Any]]:
) -> List[Face]:
"""
Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
when using the "default" metric. Defaults to 2.0.
Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
List[Face]: List of Face objects, each containing:
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- confidence (float): Detection confidence score (0.0 to 1.0)
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example:
>>> faces = detector.detect(image)
>>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,)
... confidence = face['confidence'] # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face.confidence # float
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks)
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
"""
original_height, original_width = image.shape[:2]
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
faces = []
for i in range(detections.shape[0]):
face_dict = {
'bbox': detections[i, :4],
'confidence': float(detections[i, 4]),
'landmarks': landmarks[i],
}
faces.append(face_dict)
face = Face(
bbox=detections[i, :4],
confidence=float(detections[i, 4]),
landmarks=landmarks[i],
)
faces.append(face)
return faces