mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
Merge pull request #42 from yakhyo/feat/standardize-outputs
feat: Standardize detection output and several other updates
This commit is contained in:
@@ -21,11 +21,28 @@ Thank you for considering contributing to UniFace! We welcome contributions of a
|
||||
|
||||
### Code Style
|
||||
|
||||
This project uses [Ruff](https://docs.astral.sh/ruff/) for linting and formatting.
|
||||
|
||||
```bash
|
||||
# Check for linting errors
|
||||
ruff check .
|
||||
|
||||
# Auto-fix linting errors
|
||||
ruff check . --fix
|
||||
|
||||
# Format code
|
||||
ruff format .
|
||||
```
|
||||
|
||||
**Guidelines:**
|
||||
- Follow PEP8 guidelines
|
||||
- Use type hints (Python 3.10+)
|
||||
- Write docstrings for public APIs
|
||||
- Line length: 120 characters
|
||||
- Keep code simple and readable
|
||||
|
||||
All PRs must pass `ruff check .` before merging.
|
||||
|
||||
## Development Setup
|
||||
|
||||
```bash
|
||||
|
||||
@@ -39,9 +39,9 @@ faces = detector.detect(image)
|
||||
# Print results
|
||||
for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}:")
|
||||
print(f" Confidence: {face['confidence']:.2f}")
|
||||
print(f" BBox: {face['bbox']}")
|
||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
||||
print(f" Confidence: {face.confidence:.2f}")
|
||||
print(f" BBox: {face.bbox}")
|
||||
print(f" Landmarks: {len(face.landmarks)} points")
|
||||
```
|
||||
|
||||
**Output:**
|
||||
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
# Extract visualization data
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
# Draw on image
|
||||
draw_detections(
|
||||
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
|
||||
|
||||
if faces1 and faces2:
|
||||
# Extract embeddings
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compute similarity (cosine similarity)
|
||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||
@@ -159,9 +159,9 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Draw results
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
draw_detections(
|
||||
image=frame,
|
||||
bboxes=bboxes,
|
||||
@@ -199,7 +199,7 @@ faces = detector.detect(image)
|
||||
|
||||
# Predict attributes
|
||||
for i, face in enumerate(faces):
|
||||
gender, age = age_gender.predict(image, face['bbox'])
|
||||
gender, age = age_gender.predict(image, face.bbox)
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"Face {i+1}: {gender_str}, {age} years old")
|
||||
```
|
||||
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
if faces:
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
print(f"Detected {len(landmarks)} landmarks")
|
||||
|
||||
# Draw landmarks
|
||||
@@ -262,8 +262,7 @@ faces = detector.detect(image)
|
||||
|
||||
# Estimate gaze for each face
|
||||
for i, face in enumerate(faces):
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
if face_crop.size > 0:
|
||||
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
|
||||
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
|
||||
# Draw gaze direction
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
draw_gaze(image, face.bbox, pitch, yaw)
|
||||
|
||||
cv2.imwrite("gaze_output.jpg", image)
|
||||
```
|
||||
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
|
||||
faces = detector.detect(image)
|
||||
|
||||
for i, face in enumerate(faces):
|
||||
label_idx, score = spoofer.predict(image, face['bbox'])
|
||||
label_idx, score = spoofer.predict(image, face.bbox)
|
||||
# label_idx: 0 = Fake, 1 = Real
|
||||
label = 'Real' if label_idx == 1 else 'Fake'
|
||||
print(f"Face {i+1}: {label} ({score:.1%})")
|
||||
|
||||
39
README.md
39
README.md
@@ -1,11 +1,15 @@
|
||||
# UniFace: All-in-One Face Analysis Library
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://www.python.org/)
|
||||
[](https://pypi.org/project/uniface/)
|
||||
[](https://github.com/yakhyo/uniface/actions)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
[](https://pepy.tech/project/uniface)
|
||||
[](https://deepwiki.com/yakhyo/uniface)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<img src=".github/logos/logo_web.webp" width=75%>
|
||||
@@ -101,9 +105,9 @@ faces = detector.detect(image)
|
||||
|
||||
# Process results
|
||||
for face in faces:
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
confidence = face['confidence']
|
||||
landmarks = face['landmarks'] # 5-point landmarks
|
||||
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
|
||||
confidence = face.confidence
|
||||
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
|
||||
print(f"Face detected with confidence: {confidence:.2f}")
|
||||
```
|
||||
|
||||
@@ -121,8 +125,8 @@ recognizer = ArcFace()
|
||||
faces1 = detector.detect(image1)
|
||||
faces2 = detector.detect(image2)
|
||||
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||
|
||||
# Compare faces
|
||||
similarity = compute_similarity(embedding1, embedding2)
|
||||
@@ -138,7 +142,7 @@ detector = RetinaFace()
|
||||
landmarker = Landmark106()
|
||||
|
||||
faces = detector.detect(image)
|
||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
||||
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||
# Returns 106 (x, y) landmark points
|
||||
```
|
||||
|
||||
@@ -151,7 +155,7 @@ detector = RetinaFace()
|
||||
age_gender = AgeGender()
|
||||
|
||||
faces = detector.detect(image)
|
||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
||||
gender, age = age_gender.predict(image, faces[0].bbox)
|
||||
gender_str = 'Female' if gender == 0 else 'Male'
|
||||
print(f"{gender_str}, {age} years old")
|
||||
```
|
||||
@@ -168,15 +172,14 @@ gaze_estimator = MobileGaze()
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
bbox = face['bbox']
|
||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
||||
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||
face_crop = image[y1:y2, x1:x2]
|
||||
|
||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||
|
||||
# Visualize
|
||||
draw_gaze(image, bbox, pitch, yaw)
|
||||
draw_gaze(image, face.bbox, pitch, yaw)
|
||||
```
|
||||
|
||||
### Face Parsing
|
||||
@@ -213,7 +216,7 @@ spoofer = MiniFASNet() # Uses V2 by default
|
||||
|
||||
faces = detector.detect(image)
|
||||
for face in faces:
|
||||
label_idx, score = spoofer.predict(image, face['bbox'])
|
||||
label_idx, score = spoofer.predict(image, face.bbox)
|
||||
# label_idx: 0 = Fake, 1 = Real
|
||||
label = 'Real' if label_idx == 1 else 'Fake'
|
||||
print(f"{label}: {score:.1%}")
|
||||
@@ -458,9 +461,9 @@ while True:
|
||||
faces = detector.detect(frame)
|
||||
|
||||
# Extract data for visualization
|
||||
bboxes = [f['bbox'] for f in faces]
|
||||
scores = [f['confidence'] for f in faces]
|
||||
landmarks = [f['landmarks'] for f in faces]
|
||||
bboxes = [f.bbox for f in faces]
|
||||
scores = [f.confidence for f in faces]
|
||||
landmarks = [f.landmarks for f in faces]
|
||||
|
||||
draw_detections(
|
||||
image=frame,
|
||||
@@ -494,7 +497,7 @@ for person_id, image_path in person_images.items():
|
||||
faces = detector.detect(image)
|
||||
if faces:
|
||||
embedding = recognizer.get_normalized_embedding(
|
||||
image, faces[0]['landmarks']
|
||||
image, faces[0].landmarks
|
||||
)
|
||||
database[person_id] = embedding
|
||||
|
||||
@@ -503,7 +506,7 @@ query_image = cv2.imread("query.jpg")
|
||||
query_faces = detector.detect(query_image)
|
||||
if query_faces:
|
||||
query_embedding = recognizer.get_normalized_embedding(
|
||||
query_image, query_faces[0]['landmarks']
|
||||
query_image, query_faces[0].landmarks
|
||||
)
|
||||
|
||||
# Find best match
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -140,13 +140,13 @@
|
||||
"\n",
|
||||
" # Draw detections\n",
|
||||
" bbox_image = image.copy()\n",
|
||||
" bboxes = [f['bbox'] for f in faces]\n",
|
||||
" scores = [f['confidence'] for f in faces]\n",
|
||||
" landmarks = [f['landmarks'] for f in faces]\n",
|
||||
" bboxes = [f.bbox for f in faces]\n",
|
||||
" scores = [f.confidence for f in faces]\n",
|
||||
" landmarks = [f.landmarks for f in faces]\n",
|
||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
||||
" first_landmarks = faces[0]['landmarks']\n",
|
||||
" first_landmarks = faces[0].landmarks\n",
|
||||
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
|
||||
"\n",
|
||||
" # Convert BGR to RGB for visualization\n",
|
||||
@@ -202,7 +202,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
|
||||
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
|
||||
"- Default output size is 112x112 (standard for face recognition models)\n"
|
||||
]
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -153,14 +153,14 @@
|
||||
"# Load image\n",
|
||||
"image = cv2.imread(image_path)\n",
|
||||
"\n",
|
||||
"# Detect faces - returns list of face dictionaries\n",
|
||||
"# Detect faces - returns list of Face objects\n",
|
||||
"faces = detector.detect(image)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"# Unpack face data for visualization\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"# Draw detections\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
@@ -211,9 +211,9 @@
|
||||
"faces = detector.detect(image, max_num=2)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -258,9 +258,9 @@
|
||||
"faces = detector.detect(image, max_num=5)\n",
|
||||
"print(f'Detected {len(faces)} face(s)')\n",
|
||||
"\n",
|
||||
"bboxes = [f['bbox'] for f in faces]\n",
|
||||
"scores = [f['confidence'] for f in faces]\n",
|
||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
||||
"bboxes = [f.bbox for f in faces]\n",
|
||||
"scores = [f.confidence for f in faces]\n",
|
||||
"landmarks = [f.landmarks for f in faces]\n",
|
||||
"\n",
|
||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||
"\n",
|
||||
@@ -274,7 +274,8 @@
|
||||
"source": [
|
||||
"## Notes\n",
|
||||
"\n",
|
||||
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
|
||||
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
|
||||
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
|
||||
"- Use `max_num` to limit detected faces"
|
||||
]
|
||||
|
||||
@@ -46,7 +46,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 1.5.0\n"
|
||||
"UniFace version: 1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -365,7 +365,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -379,7 +379,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1.3.1\n"
|
||||
"1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UniFace version: 1.4.0\n"
|
||||
"UniFace version: 1.6.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -152,8 +152,7 @@
|
||||
"\n",
|
||||
" # Estimate gaze for each face\n",
|
||||
" for i, face in enumerate(faces):\n",
|
||||
" bbox = face['bbox']\n",
|
||||
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
|
||||
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
|
||||
" face_crop = image[y1:y2, x1:x2]\n",
|
||||
"\n",
|
||||
" if face_crop.size > 0:\n",
|
||||
@@ -164,7 +163,7 @@
|
||||
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
|
||||
"\n",
|
||||
" # Draw gaze without angle text\n",
|
||||
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
|
||||
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
|
||||
"\n",
|
||||
" # Convert BGR to RGB for display\n",
|
||||
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
|
||||
@@ -249,7 +248,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -263,7 +262,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
"version": "3.13.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "uniface"
|
||||
version = "1.5.3"
|
||||
version = "1.6.0"
|
||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||
readme = "README.md"
|
||||
license = { text = "MIT" }
|
||||
|
||||
@@ -205,4 +205,3 @@ Examples:
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
__license__ = 'MIT'
|
||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||
__version__ = '1.5.3'
|
||||
__version__ = '1.6.0'
|
||||
|
||||
|
||||
from uniface.face_utils import compute_similarity, face_alignment
|
||||
|
||||
@@ -36,41 +36,24 @@ class FaceAnalyzer:
|
||||
|
||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||
"""Analyze faces in an image."""
|
||||
detections = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
||||
faces = self.detector.detect(image)
|
||||
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||
|
||||
faces = []
|
||||
for idx, detection in enumerate(detections):
|
||||
bbox = detection['bbox']
|
||||
confidence = detection['confidence']
|
||||
landmarks = detection['landmarks']
|
||||
|
||||
embedding = None
|
||||
for idx, face in enumerate(faces):
|
||||
if self.recognizer is not None:
|
||||
try:
|
||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
||||
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||
|
||||
age, gender = None, None
|
||||
if self.age_gender is not None:
|
||||
try:
|
||||
gender, age = self.age_gender.predict(image, bbox)
|
||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
|
||||
face.gender, face.age = self.age_gender.predict(image, face.bbox)
|
||||
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.gender}')
|
||||
except Exception as e:
|
||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||
|
||||
face = Face(
|
||||
bbox=bbox,
|
||||
confidence=confidence,
|
||||
landmarks=landmarks,
|
||||
embedding=embedding,
|
||||
age=age,
|
||||
gender=gender,
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||
return faces
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face import Face
|
||||
|
||||
from .base import BaseDetector
|
||||
from .retinaface import RetinaFace
|
||||
from .scrfd import SCRFD
|
||||
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
|
||||
_detector_cache: Dict[str, BaseDetector] = {}
|
||||
|
||||
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
|
||||
"""
|
||||
High-level face detection function.
|
||||
|
||||
@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
||||
**kwargs: Additional arguments passed to the detector.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- 'confidence' (float): The confidence score of the detection.
|
||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
||||
List[Face]: A list of Face objects, each containing:
|
||||
- bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
|
||||
- confidence (float): The confidence score of the detection.
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
|
||||
|
||||
Example:
|
||||
>>> from uniface import detect_faces
|
||||
>>> image = cv2.imread("your_image.jpg")
|
||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||
>>> for face in faces:
|
||||
... print(f"Found face with confidence: {face['confidence']}")
|
||||
... print(f"BBox: {face['bbox']}")
|
||||
... print(f"Found face with confidence: {face.confidence}")
|
||||
... print(f"BBox: {face.bbox}")
|
||||
"""
|
||||
method_name = method.lower()
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from uniface.face import Face
|
||||
|
||||
|
||||
class BaseDetector(ABC):
|
||||
"""
|
||||
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
|
||||
self.config = kwargs
|
||||
|
||||
@abstractmethod
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
||||
def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
|
||||
"""
|
||||
Detect faces in an image.
|
||||
|
||||
@@ -30,18 +32,17 @@ class BaseDetector(ABC):
|
||||
**kwargs: Additional detection parameters
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
||||
List[Face]: List of detected Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, List, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -14,6 +14,7 @@ from uniface.common import (
|
||||
resize_image,
|
||||
)
|
||||
from uniface.constants import RetinaFaceWeights
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
@@ -350,19 +351,12 @@ if __name__ == '__main__':
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
# Extract bbox and landmarks from Face object
|
||||
draw_bbox(frame, face.bbox, face.confidence)
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
# Draw landmarks if available
|
||||
if face.landmarks is not None and len(face.landmarks) > 0:
|
||||
draw_keypoints(frame, face.landmarks)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||
from uniface.constants import SCRFDWeights
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
@@ -324,19 +325,12 @@ if __name__ == '__main__':
|
||||
|
||||
# Process each detected face
|
||||
for face in faces:
|
||||
# Extract bbox and landmarks from dictionary
|
||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
||||
confidence = face['confidence']
|
||||
# Extract bbox and landmarks from Face object
|
||||
draw_bbox(frame, face.bbox, face.confidence)
|
||||
|
||||
# Pass bbox and confidence separately
|
||||
draw_bbox(frame, bbox, confidence)
|
||||
|
||||
# Convert landmarks to numpy array format if needed
|
||||
if landmarks is not None and len(landmarks) > 0:
|
||||
# Convert list of [x, y] pairs to numpy array
|
||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
||||
draw_keypoints(frame, points)
|
||||
# Draw landmarks if available
|
||||
if face.landmarks is not None and len(face.landmarks) > 0:
|
||||
draw_keypoints(frame, face.landmarks)
|
||||
|
||||
# Display face count
|
||||
cv2.putText(
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
# Author: Yakhyokhuja Valikhujaev
|
||||
# GitHub: https://github.com/yakhyo
|
||||
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
from typing import Any, List, Literal, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from uniface.common import non_max_suppression
|
||||
from uniface.constants import YOLOv5FaceWeights
|
||||
from uniface.face import Face
|
||||
from uniface.log import Logger
|
||||
from uniface.model_store import verify_model_weights
|
||||
from uniface.onnx_utils import create_onnx_session
|
||||
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
|
||||
max_num: int = 0,
|
||||
metric: Literal['default', 'max'] = 'max',
|
||||
center_weight: float = 2.0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> List[Face]:
|
||||
"""
|
||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||
|
||||
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
|
||||
when using the "default" metric. Defaults to 2.0.
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
List[Face]: List of Face objects, each containing:
|
||||
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||
|
||||
Example:
|
||||
>>> faces = detector.detect(image)
|
||||
>>> for face in faces:
|
||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
||||
... confidence = face['confidence'] # float
|
||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
||||
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||
... confidence = face.confidence # float
|
||||
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||
... # Can pass landmarks directly to recognition
|
||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
||||
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||
"""
|
||||
|
||||
original_height, original_width = image.shape[:2]
|
||||
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
|
||||
|
||||
faces = []
|
||||
for i in range(detections.shape[0]):
|
||||
face_dict = {
|
||||
'bbox': detections[i, :4],
|
||||
'confidence': float(detections[i, 4]),
|
||||
'landmarks': landmarks[i],
|
||||
}
|
||||
faces.append(face_dict)
|
||||
face = Face(
|
||||
bbox=detections[i, :4],
|
||||
confidence=float(detections[i, 4]),
|
||||
landmarks=landmarks[i],
|
||||
)
|
||||
faces.append(face)
|
||||
|
||||
return faces
|
||||
|
||||
Reference in New Issue
Block a user