Merge pull request #42 from yakhyo/feat/standardize-outputs

feat: Standardize detection output and several other updates
This commit is contained in:
Yakhyokhuja Valikhujaev
2025-12-24 00:38:32 +09:00
committed by GitHub
20 changed files with 188 additions and 192 deletions

View File

@@ -21,11 +21,28 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
### Code Style ### Code Style
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
```bash
# Check for linting errors
ruff check .
# Auto-fix linting errors
ruff check . --fix
# Format code
ruff format .
```
**Guidelines:**
- Follow PEP8 guidelines - Follow PEP8 guidelines
- Use type hints (Python 3.10+) - Use type hints (Python 3.10+)
- Write docstrings for public APIs - Write docstrings for public APIs
- Line length: 120 characters
- Keep code simple and readable - Keep code simple and readable
All PRs must pass `ruff check .` before merging.
## Development Setup ## Development Setup
```bash ```bash

View File

@@ -39,9 +39,9 @@ faces = detector.detect(image)
# Print results # Print results
for i, face in enumerate(faces): for i, face in enumerate(faces):
print(f"Face {i+1}:") print(f"Face {i+1}:")
print(f" Confidence: {face['confidence']:.2f}") print(f" Confidence: {face.confidence:.2f}")
print(f" BBox: {face['bbox']}") print(f" BBox: {face.bbox}")
print(f" Landmarks: {len(face['landmarks'])} points") print(f" Landmarks: {len(face.landmarks)} points")
``` ```
**Output:** **Output:**
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image) faces = detector.detect(image)
# Extract visualization data # Extract visualization data
bboxes = [f['bbox'] for f in faces] bboxes = [f.bbox for f in faces]
scores = [f['confidence'] for f in faces] scores = [f.confidence for f in faces]
landmarks = [f['landmarks'] for f in faces] landmarks = [f.landmarks for f in faces]
# Draw on image # Draw on image
draw_detections( draw_detections(
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
if faces1 and faces2: if faces1 and faces2:
# Extract embeddings # Extract embeddings
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks']) emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks']) emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compute similarity (cosine similarity) # Compute similarity (cosine similarity)
similarity = np.dot(emb1, emb2.T)[0][0] similarity = np.dot(emb1, emb2.T)[0][0]
@@ -159,9 +159,9 @@ while True:
faces = detector.detect(frame) faces = detector.detect(frame)
# Draw results # Draw results
bboxes = [f['bbox'] for f in faces] bboxes = [f.bbox for f in faces]
scores = [f['confidence'] for f in faces] scores = [f.confidence for f in faces]
landmarks = [f['landmarks'] for f in faces] landmarks = [f.landmarks for f in faces]
draw_detections( draw_detections(
image=frame, image=frame,
bboxes=bboxes, bboxes=bboxes,
@@ -199,7 +199,7 @@ faces = detector.detect(image)
# Predict attributes # Predict attributes
for i, face in enumerate(faces): for i, face in enumerate(faces):
gender, age = age_gender.predict(image, face['bbox']) gender, age = age_gender.predict(image, face.bbox)
gender_str = 'Female' if gender == 0 else 'Male' gender_str = 'Female' if gender == 0 else 'Male'
print(f"Face {i+1}: {gender_str}, {age} years old") print(f"Face {i+1}: {gender_str}, {age} years old")
``` ```
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image) faces = detector.detect(image)
if faces: if faces:
landmarks = landmarker.get_landmarks(image, faces[0]['bbox']) landmarks = landmarker.get_landmarks(image, faces[0].bbox)
print(f"Detected {len(landmarks)} landmarks") print(f"Detected {len(landmarks)} landmarks")
# Draw landmarks # Draw landmarks
@@ -262,8 +262,7 @@ faces = detector.detect(image)
# Estimate gaze for each face # Estimate gaze for each face
for i, face in enumerate(faces): for i, face in enumerate(faces):
bbox = face['bbox'] x1, y1, x2, y2 = map(int, face.bbox[:4])
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = image[y1:y2, x1:x2] face_crop = image[y1:y2, x1:x2]
if face_crop.size > 0: if face_crop.size > 0:
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°") print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
# Draw gaze direction # Draw gaze direction
draw_gaze(image, bbox, pitch, yaw) draw_gaze(image, face.bbox, pitch, yaw)
cv2.imwrite("gaze_output.jpg", image) cv2.imwrite("gaze_output.jpg", image)
``` ```
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
faces = detector.detect(image) faces = detector.detect(image)
for i, face in enumerate(faces): for i, face in enumerate(faces):
label_idx, score = spoofer.predict(image, face['bbox']) label_idx, score = spoofer.predict(image, face.bbox)
# label_idx: 0 = Fake, 1 = Real # label_idx: 0 = Fake, 1 = Real
label = 'Real' if label_idx == 1 else 'Fake' label = 'Real' if label_idx == 1 else 'Fake'
print(f"Face {i+1}: {label} ({score:.1%})") print(f"Face {i+1}: {label} ({score:.1%})")

View File

@@ -1,11 +1,15 @@
# UniFace: All-in-One Face Analysis Library # UniFace: All-in-One Face Analysis Library
<div align="center">
[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![Python](https://img.shields.io/badge/Python-3.10%2B-blue)](https://www.python.org/) [![Python](https://img.shields.io/badge/Python-3.10%2B-blue)](https://www.python.org/)
[![PyPI](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/) [![PyPI](https://img.shields.io/pypi/v/uniface.svg)](https://pypi.org/project/uniface/)
[![CI](https://github.com/yakhyo/uniface/actions/workflows/ci.yml/badge.svg)](https://github.com/yakhyo/uniface/actions) [![CI](https://github.com/yakhyo/uniface/actions/workflows/ci.yml/badge.svg)](https://github.com/yakhyo/uniface/actions)
[![Downloads](https://pepy.tech/badge/uniface)](https://pepy.tech/project/uniface) [![Downloads](https://static.pepy.tech/badge/uniface)](https://pepy.tech/project/uniface)
[![DeepWiki](https://img.shields.io/badge/DeepWiki-yakhyo%2Funiface-blue.svg?logo=)](https://deepwiki.com/yakhyo/uniface) [![DeepWiki](https://img.shields.io/badge/DeepWiki-AI_Docs-blue.svg?logo=bookstack)](https://deepwiki.com/yakhyo/uniface)
</div>
<div align="center"> <div align="center">
<img src=".github/logos/logo_web.webp" width=75%> <img src=".github/logos/logo_web.webp" width=75%>
@@ -101,9 +105,9 @@ faces = detector.detect(image)
# Process results # Process results
for face in faces: for face in faces:
bbox = face['bbox'] # [x1, y1, x2, y2] bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
confidence = face['confidence'] confidence = face.confidence
landmarks = face['landmarks'] # 5-point landmarks landmarks = face.landmarks # np.ndarray (5, 2) landmarks
print(f"Face detected with confidence: {confidence:.2f}") print(f"Face detected with confidence: {confidence:.2f}")
``` ```
@@ -121,8 +125,8 @@ recognizer = ArcFace()
faces1 = detector.detect(image1) faces1 = detector.detect(image1)
faces2 = detector.detect(image2) faces2 = detector.detect(image2)
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks']) embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks']) embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
# Compare faces # Compare faces
similarity = compute_similarity(embedding1, embedding2) similarity = compute_similarity(embedding1, embedding2)
@@ -138,7 +142,7 @@ detector = RetinaFace()
landmarker = Landmark106() landmarker = Landmark106()
faces = detector.detect(image) faces = detector.detect(image)
landmarks = landmarker.get_landmarks(image, faces[0]['bbox']) landmarks = landmarker.get_landmarks(image, faces[0].bbox)
# Returns 106 (x, y) landmark points # Returns 106 (x, y) landmark points
``` ```
@@ -151,7 +155,7 @@ detector = RetinaFace()
age_gender = AgeGender() age_gender = AgeGender()
faces = detector.detect(image) faces = detector.detect(image)
gender, age = age_gender.predict(image, faces[0]['bbox']) gender, age = age_gender.predict(image, faces[0].bbox)
gender_str = 'Female' if gender == 0 else 'Male' gender_str = 'Female' if gender == 0 else 'Male'
print(f"{gender_str}, {age} years old") print(f"{gender_str}, {age} years old")
``` ```
@@ -168,15 +172,14 @@ gaze_estimator = MobileGaze()
faces = detector.detect(image) faces = detector.detect(image)
for face in faces: for face in faces:
bbox = face['bbox'] x1, y1, x2, y2 = map(int, face.bbox[:4])
x1, y1, x2, y2 = map(int, bbox[:4])
face_crop = image[y1:y2, x1:x2] face_crop = image[y1:y2, x1:x2]
pitch, yaw = gaze_estimator.estimate(face_crop) pitch, yaw = gaze_estimator.estimate(face_crop)
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°") print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
# Visualize # Visualize
draw_gaze(image, bbox, pitch, yaw) draw_gaze(image, face.bbox, pitch, yaw)
``` ```
### Face Parsing ### Face Parsing
@@ -213,7 +216,7 @@ spoofer = MiniFASNet() # Uses V2 by default
faces = detector.detect(image) faces = detector.detect(image)
for face in faces: for face in faces:
label_idx, score = spoofer.predict(image, face['bbox']) label_idx, score = spoofer.predict(image, face.bbox)
# label_idx: 0 = Fake, 1 = Real # label_idx: 0 = Fake, 1 = Real
label = 'Real' if label_idx == 1 else 'Fake' label = 'Real' if label_idx == 1 else 'Fake'
print(f"{label}: {score:.1%}") print(f"{label}: {score:.1%}")
@@ -458,9 +461,9 @@ while True:
faces = detector.detect(frame) faces = detector.detect(frame)
# Extract data for visualization # Extract data for visualization
bboxes = [f['bbox'] for f in faces] bboxes = [f.bbox for f in faces]
scores = [f['confidence'] for f in faces] scores = [f.confidence for f in faces]
landmarks = [f['landmarks'] for f in faces] landmarks = [f.landmarks for f in faces]
draw_detections( draw_detections(
image=frame, image=frame,
@@ -494,7 +497,7 @@ for person_id, image_path in person_images.items():
faces = detector.detect(image) faces = detector.detect(image)
if faces: if faces:
embedding = recognizer.get_normalized_embedding( embedding = recognizer.get_normalized_embedding(
image, faces[0]['landmarks'] image, faces[0].landmarks
) )
database[person_id] = embedding database[person_id] = embedding
@@ -503,7 +506,7 @@ query_image = cv2.imread("query.jpg")
query_faces = detector.detect(query_image) query_faces = detector.detect(query_image)
if query_faces: if query_faces:
query_embedding = recognizer.get_normalized_embedding( query_embedding = recognizer.get_normalized_embedding(
query_image, query_faces[0]['landmarks'] query_image, query_faces[0].landmarks
) )
# Find best match # Find best match

View File

@@ -48,7 +48,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1.3.1\n" "1.6.0\n"
] ]
} }
], ],
@@ -140,13 +140,13 @@
"\n", "\n",
" # Draw detections\n", " # Draw detections\n",
" bbox_image = image.copy()\n", " bbox_image = image.copy()\n",
" bboxes = [f['bbox'] for f in faces]\n", " bboxes = [f.bbox for f in faces]\n",
" scores = [f['confidence'] for f in faces]\n", " scores = [f.confidence for f in faces]\n",
" landmarks = [f['landmarks'] for f in faces]\n", " landmarks = [f.landmarks for f in faces]\n",
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n", " draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n", "\n",
" # Align first detected face (returns aligned image and inverse transform matrix)\n", " # Align first detected face (returns aligned image and inverse transform matrix)\n",
" first_landmarks = faces[0]['landmarks']\n", " first_landmarks = faces[0].landmarks\n",
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n", " aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
"\n", "\n",
" # Convert BGR to RGB for visualization\n", " # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
"source": [ "source": [
"## Notes\n", "## Notes\n",
"\n", "\n",
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n", "- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n", "- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
"- Default output size is 112x112 (standard for face recognition models)\n" "- Default output size is 112x112 (standard for face recognition models)\n"
] ]

View File

@@ -44,7 +44,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1.3.1\n" "1.6.0\n"
] ]
} }
], ],

File diff suppressed because one or more lines are too long

View File

@@ -44,7 +44,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1.3.1\n" "1.6.0\n"
] ]
} }
], ],
@@ -153,14 +153,14 @@
"# Load image\n", "# Load image\n",
"image = cv2.imread(image_path)\n", "image = cv2.imread(image_path)\n",
"\n", "\n",
"# Detect faces - returns list of face dictionaries\n", "# Detect faces - returns list of Face objects\n",
"faces = detector.detect(image)\n", "faces = detector.detect(image)\n",
"print(f'Detected {len(faces)} face(s)')\n", "print(f'Detected {len(faces)} face(s)')\n",
"\n", "\n",
"# Unpack face data for visualization\n", "# Unpack face data for visualization\n",
"bboxes = [f['bbox'] for f in faces]\n", "bboxes = [f.bbox for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n", "scores = [f.confidence for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n", "landmarks = [f.landmarks for f in faces]\n",
"\n", "\n",
"# Draw detections\n", "# Draw detections\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n", "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
"faces = detector.detect(image, max_num=2)\n", "faces = detector.detect(image, max_num=2)\n",
"print(f'Detected {len(faces)} face(s)')\n", "print(f'Detected {len(faces)} face(s)')\n",
"\n", "\n",
"bboxes = [f['bbox'] for f in faces]\n", "bboxes = [f.bbox for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n", "scores = [f.confidence for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n", "landmarks = [f.landmarks for f in faces]\n",
"\n", "\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n", "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n", "\n",
@@ -258,9 +258,9 @@
"faces = detector.detect(image, max_num=5)\n", "faces = detector.detect(image, max_num=5)\n",
"print(f'Detected {len(faces)} face(s)')\n", "print(f'Detected {len(faces)} face(s)')\n",
"\n", "\n",
"bboxes = [f['bbox'] for f in faces]\n", "bboxes = [f.bbox for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n", "scores = [f.confidence for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n", "landmarks = [f.landmarks for f in faces]\n",
"\n", "\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n", "draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n", "\n",
@@ -274,7 +274,8 @@
"source": [ "source": [
"## Notes\n", "## Notes\n",
"\n", "\n",
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n", "- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n", "- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
"- Use `max_num` to limit detected faces" "- Use `max_num` to limit detected faces"
] ]

View File

@@ -46,7 +46,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"UniFace version: 1.5.0\n" "UniFace version: 1.6.0\n"
] ]
} }
], ],
@@ -365,7 +365,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "base",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -379,7 +379,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.0" "version": "3.13.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -42,7 +42,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1.3.1\n" "1.6.0\n"
] ]
} }
], ],

View File

@@ -37,7 +37,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1.3.1\n" "1.6.0\n"
] ]
} }
], ],

View File

@@ -44,7 +44,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"UniFace version: 1.4.0\n" "UniFace version: 1.6.0\n"
] ]
} }
], ],
@@ -152,8 +152,7 @@
"\n", "\n",
" # Estimate gaze for each face\n", " # Estimate gaze for each face\n",
" for i, face in enumerate(faces):\n", " for i, face in enumerate(faces):\n",
" bbox = face['bbox']\n", " x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
" face_crop = image[y1:y2, x1:x2]\n", " face_crop = image[y1:y2, x1:x2]\n",
"\n", "\n",
" if face_crop.size > 0:\n", " if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n", " print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
"\n", "\n",
" # Draw gaze without angle text\n", " # Draw gaze without angle text\n",
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n", " draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
"\n", "\n",
" # Convert BGR to RGB for display\n", " # Convert BGR to RGB for display\n",
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n", " original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "base",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@@ -263,7 +262,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.0" "version": "3.13.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "uniface" name = "uniface"
version = "1.5.3" version = "1.6.0"
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection" description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
readme = "README.md" readme = "README.md"
license = { text = "MIT" } license = { text = "MIT" }

View File

@@ -205,4 +205,3 @@ Examples:
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@@ -13,7 +13,7 @@
__license__ = 'MIT' __license__ = 'MIT'
__author__ = 'Yakhyokhuja Valikhujaev' __author__ = 'Yakhyokhuja Valikhujaev'
__version__ = '1.5.3' __version__ = '1.6.0'
from uniface.face_utils import compute_similarity, face_alignment from uniface.face_utils import compute_similarity, face_alignment

View File

@@ -36,41 +36,24 @@ class FaceAnalyzer:
def analyze(self, image: np.ndarray) -> List[Face]: def analyze(self, image: np.ndarray) -> List[Face]:
"""Analyze faces in an image.""" """Analyze faces in an image."""
detections = self.detector.detect(image) faces = self.detector.detect(image)
Logger.debug(f'Detected {len(detections)} face(s)') Logger.debug(f'Detected {len(faces)} face(s)')
faces = [] for idx, face in enumerate(faces):
for idx, detection in enumerate(detections):
bbox = detection['bbox']
confidence = detection['confidence']
landmarks = detection['landmarks']
embedding = None
if self.recognizer is not None: if self.recognizer is not None:
try: try:
embedding = self.recognizer.get_normalized_embedding(image, landmarks) face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}') Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
except Exception as e: except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}') Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
age, gender = None, None
if self.age_gender is not None: if self.age_gender is not None:
try: try:
gender, age = self.age_gender.predict(image, bbox) face.gender, face.age = self.age_gender.predict(image, face.bbox)
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}') Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.gender}')
except Exception as e: except Exception as e:
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}') Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
face = Face(
bbox=bbox,
confidence=confidence,
landmarks=landmarks,
embedding=embedding,
age=age,
gender=gender,
)
faces.append(face)
Logger.info(f'Analysis complete: {len(faces)} face(s) processed') Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
return faces return faces

View File

@@ -7,6 +7,8 @@ from typing import Any, Dict, List
import numpy as np import numpy as np
from uniface.face import Face
from .base import BaseDetector from .base import BaseDetector
from .retinaface import RetinaFace from .retinaface import RetinaFace
from .scrfd import SCRFD from .scrfd import SCRFD
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
_detector_cache: Dict[str, BaseDetector] = {} _detector_cache: Dict[str, BaseDetector] = {}
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]: def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
""" """
High-level face detection function. High-level face detection function.
@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
**kwargs: Additional arguments passed to the detector. **kwargs: Additional arguments passed to the detector.
Returns: Returns:
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains: List[Face]: A list of Face objects, each containing:
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates. - bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
- 'confidence' (float): The confidence score of the detection. - confidence (float): The confidence score of the detection.
- 'landmarks' (List[List[float]]): 5-point facial landmarks. - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
Example: Example:
>>> from uniface import detect_faces >>> from uniface import detect_faces
>>> image = cv2.imread("your_image.jpg") >>> image = cv2.imread("your_image.jpg")
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8) >>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
>>> for face in faces: >>> for face in faces:
... print(f"Found face with confidence: {face['confidence']}") ... print(f"Found face with confidence: {face.confidence}")
... print(f"BBox: {face['bbox']}") ... print(f"BBox: {face.bbox}")
""" """
method_name = method.lower() method_name = method.lower()

View File

@@ -7,6 +7,8 @@ from typing import Any, Dict, List
import numpy as np import numpy as np
from uniface.face import Face
class BaseDetector(ABC): class BaseDetector(ABC):
""" """
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
self.config = kwargs self.config = kwargs
@abstractmethod @abstractmethod
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]: def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
""" """
Detect faces in an image. Detect faces in an image.
@@ -30,18 +32,17 @@ class BaseDetector(ABC):
**kwargs: Additional detection parameters **kwargs: Additional detection parameters
Returns: Returns:
List[Dict[str, Any]]: List of detected faces, where each dictionary contains: List[Face]: List of detected Face objects, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2] - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0) - confidence (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks - landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
or (68, 2) for 68-point landmarks. Empty array if not supported.
Example: Example:
>>> faces = detector.detect(image) >>> faces = detector.detect(image)
>>> for face in faces: >>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,) ... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face['confidence'] # float ... confidence = face.confidence # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2) ... landmarks = face.landmarks # np.ndarray with shape (5, 2)
""" """
pass pass

View File

@@ -2,7 +2,7 @@
# Author: Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo # GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple from typing import Any, List, Literal, Tuple
import numpy as np import numpy as np
@@ -14,6 +14,7 @@ from uniface.common import (
resize_image, resize_image,
) )
from uniface.constants import RetinaFaceWeights from uniface.constants import RetinaFaceWeights
from uniface.face import Face
from uniface.log import Logger from uniface.log import Logger
from uniface.model_store import verify_model_weights from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session from uniface.onnx_utils import create_onnx_session
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
max_num: int = 0, max_num: int = 0,
metric: Literal['default', 'max'] = 'max', metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0, center_weight: float = 2.0,
) -> List[Dict[str, Any]]: ) -> List[Face]:
""" """
Perform face detection on an input image and return bounding boxes and facial landmarks. Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
when using the "default" metric. Defaults to 2.0. when using the "default" metric. Defaults to 2.0.
Returns: Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing: List[Face]: List of Face objects, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2] - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0) - confidence (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2) - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example: Example:
>>> faces = detector.detect(image) >>> faces = detector.detect(image)
>>> for face in faces: >>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,) ... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face['confidence'] # float ... confidence = face.confidence # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2) ... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition ... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks) ... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
""" """
original_height, original_width = image.shape[:2] original_height, original_width = image.shape[:2]
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):
faces = [] faces = []
for i in range(detections.shape[0]): for i in range(detections.shape[0]):
face_dict = { face = Face(
'bbox': detections[i, :4], bbox=detections[i, :4],
'confidence': float(detections[i, 4]), confidence=float(detections[i, 4]),
'landmarks': landmarks[i], landmarks=landmarks[i],
} )
faces.append(face_dict) faces.append(face)
return faces return faces
@@ -350,19 +351,12 @@ if __name__ == '__main__':
# Process each detected face # Process each detected face
for face in faces: for face in faces:
# Extract bbox and landmarks from dictionary # Extract bbox and landmarks from Face object
bbox = face['bbox'] # [x1, y1, x2, y2] draw_bbox(frame, face.bbox, face.confidence)
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Pass bbox and confidence separately # Draw landmarks if available
draw_bbox(frame, bbox, confidence) if face.landmarks is not None and len(face.landmarks) > 0:
draw_keypoints(frame, face.landmarks)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Display face count # Display face count
cv2.putText( cv2.putText(

View File

@@ -2,13 +2,14 @@
# Author: Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo # GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple from typing import Any, List, Literal, Tuple
import cv2 import cv2
import numpy as np import numpy as np
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
from uniface.constants import SCRFDWeights from uniface.constants import SCRFDWeights
from uniface.face import Face
from uniface.log import Logger from uniface.log import Logger
from uniface.model_store import verify_model_weights from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session from uniface.onnx_utils import create_onnx_session
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
max_num: int = 0, max_num: int = 0,
metric: Literal['default', 'max'] = 'max', metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0, center_weight: float = 2.0,
) -> List[Dict[str, Any]]: ) -> List[Face]:
""" """
Perform face detection on an input image and return bounding boxes and facial landmarks. Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
when using the "default" metric. Defaults to 2.0. when using the "default" metric. Defaults to 2.0.
Returns: Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing: List[Face]: List of Face objects, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2] - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0) - confidence (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2) - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example: Example:
>>> faces = detector.detect(image) >>> faces = detector.detect(image)
>>> for face in faces: >>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,) ... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face['confidence'] # float ... confidence = face.confidence # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2) ... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition ... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks) ... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
""" """
original_height, original_width = image.shape[:2] original_height, original_width = image.shape[:2]
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):
faces = [] faces = []
for i in range(detections.shape[0]): for i in range(detections.shape[0]):
face_dict = { face = Face(
'bbox': detections[i, :4], bbox=detections[i, :4],
'confidence': float(detections[i, 4]), confidence=float(detections[i, 4]),
'landmarks': landmarks[i], landmarks=landmarks[i],
} )
faces.append(face_dict) faces.append(face)
return faces return faces
@@ -324,19 +325,12 @@ if __name__ == '__main__':
# Process each detected face # Process each detected face
for face in faces: for face in faces:
# Extract bbox and landmarks from dictionary # Extract bbox and landmarks from Face object
bbox = face['bbox'] # [x1, y1, x2, y2] draw_bbox(frame, face.bbox, face.confidence)
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
confidence = face['confidence']
# Pass bbox and confidence separately # Draw landmarks if available
draw_bbox(frame, bbox, confidence) if face.landmarks is not None and len(face.landmarks) > 0:
draw_keypoints(frame, face.landmarks)
# Convert landmarks to numpy array format if needed
if landmarks is not None and len(landmarks) > 0:
# Convert list of [x, y] pairs to numpy array
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
draw_keypoints(frame, points)
# Display face count # Display face count
cv2.putText( cv2.putText(

View File

@@ -2,13 +2,14 @@
# Author: Yakhyokhuja Valikhujaev # Author: Yakhyokhuja Valikhujaev
# GitHub: https://github.com/yakhyo # GitHub: https://github.com/yakhyo
from typing import Any, Dict, List, Literal, Tuple from typing import Any, List, Literal, Tuple
import cv2 import cv2
import numpy as np import numpy as np
from uniface.common import non_max_suppression from uniface.common import non_max_suppression
from uniface.constants import YOLOv5FaceWeights from uniface.constants import YOLOv5FaceWeights
from uniface.face import Face
from uniface.log import Logger from uniface.log import Logger
from uniface.model_store import verify_model_weights from uniface.model_store import verify_model_weights
from uniface.onnx_utils import create_onnx_session from uniface.onnx_utils import create_onnx_session
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
max_num: int = 0, max_num: int = 0,
metric: Literal['default', 'max'] = 'max', metric: Literal['default', 'max'] = 'max',
center_weight: float = 2.0, center_weight: float = 2.0,
) -> List[Dict[str, Any]]: ) -> List[Face]:
""" """
Perform face detection on an input image and return bounding boxes and facial landmarks. Perform face detection on an input image and return bounding boxes and facial landmarks.
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
when using the "default" metric. Defaults to 2.0. when using the "default" metric. Defaults to 2.0.
Returns: Returns:
List[Dict[str, Any]]: List of face detection dictionaries, each containing: List[Face]: List of Face objects, each containing:
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2] - bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
- 'confidence' (float): Detection confidence score (0.0 to 1.0) - confidence (float): Detection confidence score (0.0 to 1.0)
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2) - landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
Example: Example:
>>> faces = detector.detect(image) >>> faces = detector.detect(image)
>>> for face in faces: >>> for face in faces:
... bbox = face['bbox'] # np.ndarray with shape (4,) ... bbox = face.bbox # np.ndarray with shape (4,)
... confidence = face['confidence'] # float ... confidence = face.confidence # float
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2) ... landmarks = face.landmarks # np.ndarray with shape (5, 2)
... # Can pass landmarks directly to recognition ... # Can pass landmarks directly to recognition
... embedding = recognizer.get_normalized_embedding(image, landmarks) ... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
""" """
original_height, original_width = image.shape[:2] original_height, original_width = image.shape[:2]
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
faces = [] faces = []
for i in range(detections.shape[0]): for i in range(detections.shape[0]):
face_dict = { face = Face(
'bbox': detections[i, :4], bbox=detections[i, :4],
'confidence': float(detections[i, 4]), confidence=float(detections[i, 4]),
'landmarks': landmarks[i], landmarks=landmarks[i],
} )
faces.append(face_dict) faces.append(face)
return faces return faces