mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
feat: Update detection module output to datalasses
This commit is contained in:
@@ -39,9 +39,9 @@ faces = detector.detect(image)
|
|||||||
# Print results
|
# Print results
|
||||||
for i, face in enumerate(faces):
|
for i, face in enumerate(faces):
|
||||||
print(f"Face {i+1}:")
|
print(f"Face {i+1}:")
|
||||||
print(f" Confidence: {face['confidence']:.2f}")
|
print(f" Confidence: {face.confidence:.2f}")
|
||||||
print(f" BBox: {face['bbox']}")
|
print(f" BBox: {face.bbox}")
|
||||||
print(f" Landmarks: {len(face['landmarks'])} points")
|
print(f" Landmarks: {len(face.landmarks)} points")
|
||||||
```
|
```
|
||||||
|
|
||||||
**Output:**
|
**Output:**
|
||||||
@@ -70,9 +70,9 @@ image = cv2.imread("photo.jpg")
|
|||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
|
|
||||||
# Extract visualization data
|
# Extract visualization data
|
||||||
bboxes = [f['bbox'] for f in faces]
|
bboxes = [f.bbox for f in faces]
|
||||||
scores = [f['confidence'] for f in faces]
|
scores = [f.confidence for f in faces]
|
||||||
landmarks = [f['landmarks'] for f in faces]
|
landmarks = [f.landmarks for f in faces]
|
||||||
|
|
||||||
# Draw on image
|
# Draw on image
|
||||||
draw_detections(
|
draw_detections(
|
||||||
@@ -113,8 +113,8 @@ faces2 = detector.detect(image2)
|
|||||||
|
|
||||||
if faces1 and faces2:
|
if faces1 and faces2:
|
||||||
# Extract embeddings
|
# Extract embeddings
|
||||||
emb1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
emb1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||||
emb2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
emb2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||||
|
|
||||||
# Compute similarity (cosine similarity)
|
# Compute similarity (cosine similarity)
|
||||||
similarity = np.dot(emb1, emb2.T)[0][0]
|
similarity = np.dot(emb1, emb2.T)[0][0]
|
||||||
@@ -159,9 +159,9 @@ while True:
|
|||||||
faces = detector.detect(frame)
|
faces = detector.detect(frame)
|
||||||
|
|
||||||
# Draw results
|
# Draw results
|
||||||
bboxes = [f['bbox'] for f in faces]
|
bboxes = [f.bbox for f in faces]
|
||||||
scores = [f['confidence'] for f in faces]
|
scores = [f.confidence for f in faces]
|
||||||
landmarks = [f['landmarks'] for f in faces]
|
landmarks = [f.landmarks for f in faces]
|
||||||
draw_detections(
|
draw_detections(
|
||||||
image=frame,
|
image=frame,
|
||||||
bboxes=bboxes,
|
bboxes=bboxes,
|
||||||
@@ -199,7 +199,7 @@ faces = detector.detect(image)
|
|||||||
|
|
||||||
# Predict attributes
|
# Predict attributes
|
||||||
for i, face in enumerate(faces):
|
for i, face in enumerate(faces):
|
||||||
gender, age = age_gender.predict(image, face['bbox'])
|
gender, age = age_gender.predict(image, face.bbox)
|
||||||
gender_str = 'Female' if gender == 0 else 'Male'
|
gender_str = 'Female' if gender == 0 else 'Male'
|
||||||
print(f"Face {i+1}: {gender_str}, {age} years old")
|
print(f"Face {i+1}: {gender_str}, {age} years old")
|
||||||
```
|
```
|
||||||
@@ -230,7 +230,7 @@ image = cv2.imread("photo.jpg")
|
|||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
|
|
||||||
if faces:
|
if faces:
|
||||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||||
print(f"Detected {len(landmarks)} landmarks")
|
print(f"Detected {len(landmarks)} landmarks")
|
||||||
|
|
||||||
# Draw landmarks
|
# Draw landmarks
|
||||||
@@ -262,8 +262,7 @@ faces = detector.detect(image)
|
|||||||
|
|
||||||
# Estimate gaze for each face
|
# Estimate gaze for each face
|
||||||
for i, face in enumerate(faces):
|
for i, face in enumerate(faces):
|
||||||
bbox = face['bbox']
|
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
|
||||||
face_crop = image[y1:y2, x1:x2]
|
face_crop = image[y1:y2, x1:x2]
|
||||||
|
|
||||||
if face_crop.size > 0:
|
if face_crop.size > 0:
|
||||||
@@ -271,7 +270,7 @@ for i, face in enumerate(faces):
|
|||||||
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
print(f"Face {i+1}: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||||
|
|
||||||
# Draw gaze direction
|
# Draw gaze direction
|
||||||
draw_gaze(image, bbox, pitch, yaw)
|
draw_gaze(image, face.bbox, pitch, yaw)
|
||||||
|
|
||||||
cv2.imwrite("gaze_output.jpg", image)
|
cv2.imwrite("gaze_output.jpg", image)
|
||||||
```
|
```
|
||||||
@@ -435,7 +434,7 @@ image = cv2.imread("photo.jpg")
|
|||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
|
|
||||||
for i, face in enumerate(faces):
|
for i, face in enumerate(faces):
|
||||||
label_idx, score = spoofer.predict(image, face['bbox'])
|
label_idx, score = spoofer.predict(image, face.bbox)
|
||||||
# label_idx: 0 = Fake, 1 = Real
|
# label_idx: 0 = Fake, 1 = Real
|
||||||
label = 'Real' if label_idx == 1 else 'Fake'
|
label = 'Real' if label_idx == 1 else 'Fake'
|
||||||
print(f"Face {i+1}: {label} ({score:.1%})")
|
print(f"Face {i+1}: {label} ({score:.1%})")
|
||||||
|
|||||||
31
README.md
31
README.md
@@ -101,9 +101,9 @@ faces = detector.detect(image)
|
|||||||
|
|
||||||
# Process results
|
# Process results
|
||||||
for face in faces:
|
for face in faces:
|
||||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
bbox = face.bbox # np.ndarray [x1, y1, x2, y2]
|
||||||
confidence = face['confidence']
|
confidence = face.confidence
|
||||||
landmarks = face['landmarks'] # 5-point landmarks
|
landmarks = face.landmarks # np.ndarray (5, 2) landmarks
|
||||||
print(f"Face detected with confidence: {confidence:.2f}")
|
print(f"Face detected with confidence: {confidence:.2f}")
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -121,8 +121,8 @@ recognizer = ArcFace()
|
|||||||
faces1 = detector.detect(image1)
|
faces1 = detector.detect(image1)
|
||||||
faces2 = detector.detect(image2)
|
faces2 = detector.detect(image2)
|
||||||
|
|
||||||
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0]['landmarks'])
|
embedding1 = recognizer.get_normalized_embedding(image1, faces1[0].landmarks)
|
||||||
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0]['landmarks'])
|
embedding2 = recognizer.get_normalized_embedding(image2, faces2[0].landmarks)
|
||||||
|
|
||||||
# Compare faces
|
# Compare faces
|
||||||
similarity = compute_similarity(embedding1, embedding2)
|
similarity = compute_similarity(embedding1, embedding2)
|
||||||
@@ -138,7 +138,7 @@ detector = RetinaFace()
|
|||||||
landmarker = Landmark106()
|
landmarker = Landmark106()
|
||||||
|
|
||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
landmarks = landmarker.get_landmarks(image, faces[0]['bbox'])
|
landmarks = landmarker.get_landmarks(image, faces[0].bbox)
|
||||||
# Returns 106 (x, y) landmark points
|
# Returns 106 (x, y) landmark points
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -151,7 +151,7 @@ detector = RetinaFace()
|
|||||||
age_gender = AgeGender()
|
age_gender = AgeGender()
|
||||||
|
|
||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
gender, age = age_gender.predict(image, faces[0]['bbox'])
|
gender, age = age_gender.predict(image, faces[0].bbox)
|
||||||
gender_str = 'Female' if gender == 0 else 'Male'
|
gender_str = 'Female' if gender == 0 else 'Male'
|
||||||
print(f"{gender_str}, {age} years old")
|
print(f"{gender_str}, {age} years old")
|
||||||
```
|
```
|
||||||
@@ -168,15 +168,14 @@ gaze_estimator = MobileGaze()
|
|||||||
|
|
||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
for face in faces:
|
for face in faces:
|
||||||
bbox = face['bbox']
|
x1, y1, x2, y2 = map(int, face.bbox[:4])
|
||||||
x1, y1, x2, y2 = map(int, bbox[:4])
|
|
||||||
face_crop = image[y1:y2, x1:x2]
|
face_crop = image[y1:y2, x1:x2]
|
||||||
|
|
||||||
pitch, yaw = gaze_estimator.estimate(face_crop)
|
pitch, yaw = gaze_estimator.estimate(face_crop)
|
||||||
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
print(f"Gaze: pitch={np.degrees(pitch):.1f}°, yaw={np.degrees(yaw):.1f}°")
|
||||||
|
|
||||||
# Visualize
|
# Visualize
|
||||||
draw_gaze(image, bbox, pitch, yaw)
|
draw_gaze(image, face.bbox, pitch, yaw)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Face Parsing
|
### Face Parsing
|
||||||
@@ -213,7 +212,7 @@ spoofer = MiniFASNet() # Uses V2 by default
|
|||||||
|
|
||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
for face in faces:
|
for face in faces:
|
||||||
label_idx, score = spoofer.predict(image, face['bbox'])
|
label_idx, score = spoofer.predict(image, face.bbox)
|
||||||
# label_idx: 0 = Fake, 1 = Real
|
# label_idx: 0 = Fake, 1 = Real
|
||||||
label = 'Real' if label_idx == 1 else 'Fake'
|
label = 'Real' if label_idx == 1 else 'Fake'
|
||||||
print(f"{label}: {score:.1%}")
|
print(f"{label}: {score:.1%}")
|
||||||
@@ -458,9 +457,9 @@ while True:
|
|||||||
faces = detector.detect(frame)
|
faces = detector.detect(frame)
|
||||||
|
|
||||||
# Extract data for visualization
|
# Extract data for visualization
|
||||||
bboxes = [f['bbox'] for f in faces]
|
bboxes = [f.bbox for f in faces]
|
||||||
scores = [f['confidence'] for f in faces]
|
scores = [f.confidence for f in faces]
|
||||||
landmarks = [f['landmarks'] for f in faces]
|
landmarks = [f.landmarks for f in faces]
|
||||||
|
|
||||||
draw_detections(
|
draw_detections(
|
||||||
image=frame,
|
image=frame,
|
||||||
@@ -494,7 +493,7 @@ for person_id, image_path in person_images.items():
|
|||||||
faces = detector.detect(image)
|
faces = detector.detect(image)
|
||||||
if faces:
|
if faces:
|
||||||
embedding = recognizer.get_normalized_embedding(
|
embedding = recognizer.get_normalized_embedding(
|
||||||
image, faces[0]['landmarks']
|
image, faces[0].landmarks
|
||||||
)
|
)
|
||||||
database[person_id] = embedding
|
database[person_id] = embedding
|
||||||
|
|
||||||
@@ -503,7 +502,7 @@ query_image = cv2.imread("query.jpg")
|
|||||||
query_faces = detector.detect(query_image)
|
query_faces = detector.detect(query_image)
|
||||||
if query_faces:
|
if query_faces:
|
||||||
query_embedding = recognizer.get_normalized_embedding(
|
query_embedding = recognizer.get_normalized_embedding(
|
||||||
query_image, query_faces[0]['landmarks']
|
query_image, query_faces[0].landmarks
|
||||||
)
|
)
|
||||||
|
|
||||||
# Find best match
|
# Find best match
|
||||||
|
|||||||
@@ -48,7 +48,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"1.3.1\n"
|
"1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -140,13 +140,13 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # Draw detections\n",
|
" # Draw detections\n",
|
||||||
" bbox_image = image.copy()\n",
|
" bbox_image = image.copy()\n",
|
||||||
" bboxes = [f['bbox'] for f in faces]\n",
|
" bboxes = [f.bbox for f in faces]\n",
|
||||||
" scores = [f['confidence'] for f in faces]\n",
|
" scores = [f.confidence for f in faces]\n",
|
||||||
" landmarks = [f['landmarks'] for f in faces]\n",
|
" landmarks = [f.landmarks for f in faces]\n",
|
||||||
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
|
||||||
" first_landmarks = faces[0]['landmarks']\n",
|
" first_landmarks = faces[0].landmarks\n",
|
||||||
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
|
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Convert BGR to RGB for visualization\n",
|
" # Convert BGR to RGB for visualization\n",
|
||||||
@@ -202,7 +202,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Notes\n",
|
"## Notes\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
|
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
|
||||||
|
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
|
||||||
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
|
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
|
||||||
"- Default output size is 112x112 (standard for face recognition models)\n"
|
"- Default output size is 112x112 (standard for face recognition models)\n"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -44,7 +44,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"1.3.1\n"
|
"1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -44,7 +44,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"1.3.1\n"
|
"1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -153,14 +153,14 @@
|
|||||||
"# Load image\n",
|
"# Load image\n",
|
||||||
"image = cv2.imread(image_path)\n",
|
"image = cv2.imread(image_path)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Detect faces - returns list of face dictionaries\n",
|
"# Detect faces - returns list of Face objects\n",
|
||||||
"faces = detector.detect(image)\n",
|
"faces = detector.detect(image)\n",
|
||||||
"print(f'Detected {len(faces)} face(s)')\n",
|
"print(f'Detected {len(faces)} face(s)')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Unpack face data for visualization\n",
|
"# Unpack face data for visualization\n",
|
||||||
"bboxes = [f['bbox'] for f in faces]\n",
|
"bboxes = [f.bbox for f in faces]\n",
|
||||||
"scores = [f['confidence'] for f in faces]\n",
|
"scores = [f.confidence for f in faces]\n",
|
||||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
"landmarks = [f.landmarks for f in faces]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Draw detections\n",
|
"# Draw detections\n",
|
||||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||||
@@ -211,9 +211,9 @@
|
|||||||
"faces = detector.detect(image, max_num=2)\n",
|
"faces = detector.detect(image, max_num=2)\n",
|
||||||
"print(f'Detected {len(faces)} face(s)')\n",
|
"print(f'Detected {len(faces)} face(s)')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"bboxes = [f['bbox'] for f in faces]\n",
|
"bboxes = [f.bbox for f in faces]\n",
|
||||||
"scores = [f['confidence'] for f in faces]\n",
|
"scores = [f.confidence for f in faces]\n",
|
||||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
"landmarks = [f.landmarks for f in faces]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -258,9 +258,9 @@
|
|||||||
"faces = detector.detect(image, max_num=5)\n",
|
"faces = detector.detect(image, max_num=5)\n",
|
||||||
"print(f'Detected {len(faces)} face(s)')\n",
|
"print(f'Detected {len(faces)} face(s)')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"bboxes = [f['bbox'] for f in faces]\n",
|
"bboxes = [f.bbox for f in faces]\n",
|
||||||
"scores = [f['confidence'] for f in faces]\n",
|
"scores = [f.confidence for f in faces]\n",
|
||||||
"landmarks = [f['landmarks'] for f in faces]\n",
|
"landmarks = [f.landmarks for f in faces]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -274,7 +274,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Notes\n",
|
"## Notes\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
|
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
|
||||||
|
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
|
||||||
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
|
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
|
||||||
"- Use `max_num` to limit detected faces"
|
"- Use `max_num` to limit detected faces"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -46,7 +46,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"UniFace version: 1.5.0\n"
|
"UniFace version: 1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -365,7 +365,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "base",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@@ -379,7 +379,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.0"
|
"version": "3.13.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"1.3.1\n"
|
"1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -37,7 +37,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"1.3.1\n"
|
"1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -44,7 +44,7 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"UniFace version: 1.4.0\n"
|
"UniFace version: 1.6.0\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -152,8 +152,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # Estimate gaze for each face\n",
|
" # Estimate gaze for each face\n",
|
||||||
" for i, face in enumerate(faces):\n",
|
" for i, face in enumerate(faces):\n",
|
||||||
" bbox = face['bbox']\n",
|
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
|
||||||
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
|
|
||||||
" face_crop = image[y1:y2, x1:x2]\n",
|
" face_crop = image[y1:y2, x1:x2]\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if face_crop.size > 0:\n",
|
" if face_crop.size > 0:\n",
|
||||||
@@ -164,7 +163,7 @@
|
|||||||
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
|
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Draw gaze without angle text\n",
|
" # Draw gaze without angle text\n",
|
||||||
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
|
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # Convert BGR to RGB for display\n",
|
" # Convert BGR to RGB for display\n",
|
||||||
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
|
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
|
||||||
@@ -249,7 +248,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "base",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@@ -263,7 +262,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.0"
|
"version": "3.13.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "uniface"
|
name = "uniface"
|
||||||
version = "1.5.3"
|
version = "1.6.0"
|
||||||
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
description = "UniFace: A Comprehensive Library for Face Detection, Recognition, Landmark Analysis, Face Parsing, Gaze Estimation, Age, and Gender Detection"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = { text = "MIT" }
|
license = { text = "MIT" }
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__author__ = 'Yakhyokhuja Valikhujaev'
|
__author__ = 'Yakhyokhuja Valikhujaev'
|
||||||
__version__ = '1.5.3'
|
__version__ = '1.6.0'
|
||||||
|
|
||||||
|
|
||||||
from uniface.face_utils import compute_similarity, face_alignment
|
from uniface.face_utils import compute_similarity, face_alignment
|
||||||
|
|||||||
@@ -36,41 +36,24 @@ class FaceAnalyzer:
|
|||||||
|
|
||||||
def analyze(self, image: np.ndarray) -> List[Face]:
|
def analyze(self, image: np.ndarray) -> List[Face]:
|
||||||
"""Analyze faces in an image."""
|
"""Analyze faces in an image."""
|
||||||
detections = self.detector.detect(image)
|
faces = self.detector.detect(image)
|
||||||
Logger.debug(f'Detected {len(detections)} face(s)')
|
Logger.debug(f'Detected {len(faces)} face(s)')
|
||||||
|
|
||||||
faces = []
|
for idx, face in enumerate(faces):
|
||||||
for idx, detection in enumerate(detections):
|
|
||||||
bbox = detection['bbox']
|
|
||||||
confidence = detection['confidence']
|
|
||||||
landmarks = detection['landmarks']
|
|
||||||
|
|
||||||
embedding = None
|
|
||||||
if self.recognizer is not None:
|
if self.recognizer is not None:
|
||||||
try:
|
try:
|
||||||
embedding = self.recognizer.get_normalized_embedding(image, landmarks)
|
face.embedding = self.recognizer.get_normalized_embedding(image, face.landmarks)
|
||||||
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {embedding.shape}')
|
Logger.debug(f' Face {idx + 1}: Extracted embedding with shape {face.embedding.shape}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
Logger.warning(f' Face {idx + 1}: Failed to extract embedding: {e}')
|
||||||
|
|
||||||
age, gender = None, None
|
|
||||||
if self.age_gender is not None:
|
if self.age_gender is not None:
|
||||||
try:
|
try:
|
||||||
gender, age = self.age_gender.predict(image, bbox)
|
face.gender, face.age = self.age_gender.predict(image, face.bbox)
|
||||||
Logger.debug(f' Face {idx + 1}: Age={age}, Gender={gender}')
|
Logger.debug(f' Face {idx + 1}: Age={face.age}, Gender={face.gender}')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
Logger.warning(f' Face {idx + 1}: Failed to predict age/gender: {e}')
|
||||||
|
|
||||||
face = Face(
|
|
||||||
bbox=bbox,
|
|
||||||
confidence=confidence,
|
|
||||||
landmarks=landmarks,
|
|
||||||
embedding=embedding,
|
|
||||||
age=age,
|
|
||||||
gender=gender,
|
|
||||||
)
|
|
||||||
faces.append(face)
|
|
||||||
|
|
||||||
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
Logger.info(f'Analysis complete: {len(faces)} face(s) processed')
|
||||||
return faces
|
return faces
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from uniface.face import Face
|
||||||
|
|
||||||
from .base import BaseDetector
|
from .base import BaseDetector
|
||||||
from .retinaface import RetinaFace
|
from .retinaface import RetinaFace
|
||||||
from .scrfd import SCRFD
|
from .scrfd import SCRFD
|
||||||
@@ -16,7 +18,7 @@ from .yolov5 import YOLOv5Face
|
|||||||
_detector_cache: Dict[str, BaseDetector] = {}
|
_detector_cache: Dict[str, BaseDetector] = {}
|
||||||
|
|
||||||
|
|
||||||
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Dict[str, Any]]:
|
def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> List[Face]:
|
||||||
"""
|
"""
|
||||||
High-level face detection function.
|
High-level face detection function.
|
||||||
|
|
||||||
@@ -26,18 +28,18 @@ def detect_faces(image: np.ndarray, method: str = 'retinaface', **kwargs) -> Lis
|
|||||||
**kwargs: Additional arguments passed to the detector.
|
**kwargs: Additional arguments passed to the detector.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, Any]]: A list of dictionaries, where each dictionary represents a detected face and contains:
|
List[Face]: A list of Face objects, each containing:
|
||||||
- 'bbox' (List[float]): [x1, y1, x2, y2] bounding box coordinates.
|
- bbox (np.ndarray): [x1, y1, x2, y2] bounding box coordinates.
|
||||||
- 'confidence' (float): The confidence score of the detection.
|
- confidence (float): The confidence score of the detection.
|
||||||
- 'landmarks' (List[List[float]]): 5-point facial landmarks.
|
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2).
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> from uniface import detect_faces
|
>>> from uniface import detect_faces
|
||||||
>>> image = cv2.imread("your_image.jpg")
|
>>> image = cv2.imread("your_image.jpg")
|
||||||
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
>>> faces = detect_faces(image, method='retinaface', conf_thresh=0.8)
|
||||||
>>> for face in faces:
|
>>> for face in faces:
|
||||||
... print(f"Found face with confidence: {face['confidence']}")
|
... print(f"Found face with confidence: {face.confidence}")
|
||||||
... print(f"BBox: {face['bbox']}")
|
... print(f"BBox: {face.bbox}")
|
||||||
"""
|
"""
|
||||||
method_name = method.lower()
|
method_name = method.lower()
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from typing import Any, Dict, List
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from uniface.face import Face
|
||||||
|
|
||||||
|
|
||||||
class BaseDetector(ABC):
|
class BaseDetector(ABC):
|
||||||
"""
|
"""
|
||||||
@@ -21,7 +23,7 @@ class BaseDetector(ABC):
|
|||||||
self.config = kwargs
|
self.config = kwargs
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def detect(self, image: np.ndarray, **kwargs) -> List[Dict[str, Any]]:
|
def detect(self, image: np.ndarray, **kwargs) -> List[Face]:
|
||||||
"""
|
"""
|
||||||
Detect faces in an image.
|
Detect faces in an image.
|
||||||
|
|
||||||
@@ -30,18 +32,17 @@ class BaseDetector(ABC):
|
|||||||
**kwargs: Additional detection parameters
|
**kwargs: Additional detection parameters
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, Any]]: List of detected faces, where each dictionary contains:
|
List[Face]: List of detected Face objects, each containing:
|
||||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||||
- 'landmarks' (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
- landmarks (np.ndarray): Facial landmarks with shape (5, 2) for 5-point landmarks
|
||||||
or (68, 2) for 68-point landmarks. Empty array if not supported.
|
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> faces = detector.detect(image)
|
>>> faces = detector.detect(image)
|
||||||
>>> for face in faces:
|
>>> for face in faces:
|
||||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||||
... confidence = face['confidence'] # float
|
... confidence = face.confidence # float
|
||||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
# Author: Yakhyokhuja Valikhujaev
|
# Author: Yakhyokhuja Valikhujaev
|
||||||
# GitHub: https://github.com/yakhyo
|
# GitHub: https://github.com/yakhyo
|
||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Tuple
|
from typing import Any, List, Literal, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -14,6 +14,7 @@ from uniface.common import (
|
|||||||
resize_image,
|
resize_image,
|
||||||
)
|
)
|
||||||
from uniface.constants import RetinaFaceWeights
|
from uniface.constants import RetinaFaceWeights
|
||||||
|
from uniface.face import Face
|
||||||
from uniface.log import Logger
|
from uniface.log import Logger
|
||||||
from uniface.model_store import verify_model_weights
|
from uniface.model_store import verify_model_weights
|
||||||
from uniface.onnx_utils import create_onnx_session
|
from uniface.onnx_utils import create_onnx_session
|
||||||
@@ -154,7 +155,7 @@ class RetinaFace(BaseDetector):
|
|||||||
max_num: int = 0,
|
max_num: int = 0,
|
||||||
metric: Literal['default', 'max'] = 'max',
|
metric: Literal['default', 'max'] = 'max',
|
||||||
center_weight: float = 2.0,
|
center_weight: float = 2.0,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Face]:
|
||||||
"""
|
"""
|
||||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||||
|
|
||||||
@@ -168,19 +169,19 @@ class RetinaFace(BaseDetector):
|
|||||||
when using the "default" metric. Defaults to 2.0.
|
when using the "default" metric. Defaults to 2.0.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
List[Face]: List of Face objects, each containing:
|
||||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> faces = detector.detect(image)
|
>>> faces = detector.detect(image)
|
||||||
>>> for face in faces:
|
>>> for face in faces:
|
||||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||||
... confidence = face['confidence'] # float
|
... confidence = face.confidence # float
|
||||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||||
... # Can pass landmarks directly to recognition
|
... # Can pass landmarks directly to recognition
|
||||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
original_height, original_width = image.shape[:2]
|
original_height, original_width = image.shape[:2]
|
||||||
@@ -229,12 +230,12 @@ class RetinaFace(BaseDetector):
|
|||||||
|
|
||||||
faces = []
|
faces = []
|
||||||
for i in range(detections.shape[0]):
|
for i in range(detections.shape[0]):
|
||||||
face_dict = {
|
face = Face(
|
||||||
'bbox': detections[i, :4],
|
bbox=detections[i, :4],
|
||||||
'confidence': float(detections[i, 4]),
|
confidence=float(detections[i, 4]),
|
||||||
'landmarks': landmarks[i],
|
landmarks=landmarks[i],
|
||||||
}
|
)
|
||||||
faces.append(face_dict)
|
faces.append(face)
|
||||||
|
|
||||||
return faces
|
return faces
|
||||||
|
|
||||||
@@ -350,19 +351,12 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# Process each detected face
|
# Process each detected face
|
||||||
for face in faces:
|
for face in faces:
|
||||||
# Extract bbox and landmarks from dictionary
|
# Extract bbox and landmarks from Face object
|
||||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
draw_bbox(frame, face.bbox, face.confidence)
|
||||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
|
||||||
confidence = face['confidence']
|
|
||||||
|
|
||||||
# Pass bbox and confidence separately
|
# Draw landmarks if available
|
||||||
draw_bbox(frame, bbox, confidence)
|
if face.landmarks is not None and len(face.landmarks) > 0:
|
||||||
|
draw_keypoints(frame, face.landmarks)
|
||||||
# Convert landmarks to numpy array format if needed
|
|
||||||
if landmarks is not None and len(landmarks) > 0:
|
|
||||||
# Convert list of [x, y] pairs to numpy array
|
|
||||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
|
||||||
draw_keypoints(frame, points)
|
|
||||||
|
|
||||||
# Display face count
|
# Display face count
|
||||||
cv2.putText(
|
cv2.putText(
|
||||||
|
|||||||
@@ -2,13 +2,14 @@
|
|||||||
# Author: Yakhyokhuja Valikhujaev
|
# Author: Yakhyokhuja Valikhujaev
|
||||||
# GitHub: https://github.com/yakhyo
|
# GitHub: https://github.com/yakhyo
|
||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Tuple
|
from typing import Any, List, Literal, Tuple
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
from uniface.common import distance2bbox, distance2kps, non_max_suppression, resize_image
|
||||||
from uniface.constants import SCRFDWeights
|
from uniface.constants import SCRFDWeights
|
||||||
|
from uniface.face import Face
|
||||||
from uniface.log import Logger
|
from uniface.log import Logger
|
||||||
from uniface.model_store import verify_model_weights
|
from uniface.model_store import verify_model_weights
|
||||||
from uniface.onnx_utils import create_onnx_session
|
from uniface.onnx_utils import create_onnx_session
|
||||||
@@ -193,7 +194,7 @@ class SCRFD(BaseDetector):
|
|||||||
max_num: int = 0,
|
max_num: int = 0,
|
||||||
metric: Literal['default', 'max'] = 'max',
|
metric: Literal['default', 'max'] = 'max',
|
||||||
center_weight: float = 2.0,
|
center_weight: float = 2.0,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Face]:
|
||||||
"""
|
"""
|
||||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||||
|
|
||||||
@@ -207,19 +208,19 @@ class SCRFD(BaseDetector):
|
|||||||
when using the "default" metric. Defaults to 2.0.
|
when using the "default" metric. Defaults to 2.0.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
List[Face]: List of Face objects, each containing:
|
||||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> faces = detector.detect(image)
|
>>> faces = detector.detect(image)
|
||||||
>>> for face in faces:
|
>>> for face in faces:
|
||||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||||
... confidence = face['confidence'] # float
|
... confidence = face.confidence # float
|
||||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||||
... # Can pass landmarks directly to recognition
|
... # Can pass landmarks directly to recognition
|
||||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
original_height, original_width = image.shape[:2]
|
original_height, original_width = image.shape[:2]
|
||||||
@@ -280,12 +281,12 @@ class SCRFD(BaseDetector):
|
|||||||
|
|
||||||
faces = []
|
faces = []
|
||||||
for i in range(detections.shape[0]):
|
for i in range(detections.shape[0]):
|
||||||
face_dict = {
|
face = Face(
|
||||||
'bbox': detections[i, :4],
|
bbox=detections[i, :4],
|
||||||
'confidence': float(detections[i, 4]),
|
confidence=float(detections[i, 4]),
|
||||||
'landmarks': landmarks[i],
|
landmarks=landmarks[i],
|
||||||
}
|
)
|
||||||
faces.append(face_dict)
|
faces.append(face)
|
||||||
|
|
||||||
return faces
|
return faces
|
||||||
|
|
||||||
@@ -324,19 +325,12 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# Process each detected face
|
# Process each detected face
|
||||||
for face in faces:
|
for face in faces:
|
||||||
# Extract bbox and landmarks from dictionary
|
# Extract bbox and landmarks from Face object
|
||||||
bbox = face['bbox'] # [x1, y1, x2, y2]
|
draw_bbox(frame, face.bbox, face.confidence)
|
||||||
landmarks = face['landmarks'] # [[x1, y1], [x2, y2], ...]
|
|
||||||
confidence = face['confidence']
|
|
||||||
|
|
||||||
# Pass bbox and confidence separately
|
# Draw landmarks if available
|
||||||
draw_bbox(frame, bbox, confidence)
|
if face.landmarks is not None and len(face.landmarks) > 0:
|
||||||
|
draw_keypoints(frame, face.landmarks)
|
||||||
# Convert landmarks to numpy array format if needed
|
|
||||||
if landmarks is not None and len(landmarks) > 0:
|
|
||||||
# Convert list of [x, y] pairs to numpy array
|
|
||||||
points = np.array(landmarks, dtype=np.float32) # Shape: (5, 2)
|
|
||||||
draw_keypoints(frame, points)
|
|
||||||
|
|
||||||
# Display face count
|
# Display face count
|
||||||
cv2.putText(
|
cv2.putText(
|
||||||
|
|||||||
@@ -2,13 +2,14 @@
|
|||||||
# Author: Yakhyokhuja Valikhujaev
|
# Author: Yakhyokhuja Valikhujaev
|
||||||
# GitHub: https://github.com/yakhyo
|
# GitHub: https://github.com/yakhyo
|
||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Tuple
|
from typing import Any, List, Literal, Tuple
|
||||||
|
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from uniface.common import non_max_suppression
|
from uniface.common import non_max_suppression
|
||||||
from uniface.constants import YOLOv5FaceWeights
|
from uniface.constants import YOLOv5FaceWeights
|
||||||
|
from uniface.face import Face
|
||||||
from uniface.log import Logger
|
from uniface.log import Logger
|
||||||
from uniface.model_store import verify_model_weights
|
from uniface.model_store import verify_model_weights
|
||||||
from uniface.onnx_utils import create_onnx_session
|
from uniface.onnx_utils import create_onnx_session
|
||||||
@@ -259,7 +260,7 @@ class YOLOv5Face(BaseDetector):
|
|||||||
max_num: int = 0,
|
max_num: int = 0,
|
||||||
metric: Literal['default', 'max'] = 'max',
|
metric: Literal['default', 'max'] = 'max',
|
||||||
center_weight: float = 2.0,
|
center_weight: float = 2.0,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Face]:
|
||||||
"""
|
"""
|
||||||
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
Perform face detection on an input image and return bounding boxes and facial landmarks.
|
||||||
|
|
||||||
@@ -273,19 +274,19 @@ class YOLOv5Face(BaseDetector):
|
|||||||
when using the "default" metric. Defaults to 2.0.
|
when using the "default" metric. Defaults to 2.0.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Dict[str, Any]]: List of face detection dictionaries, each containing:
|
List[Face]: List of Face objects, each containing:
|
||||||
- 'bbox' (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
- bbox (np.ndarray): Bounding box coordinates with shape (4,) as [x1, y1, x2, y2]
|
||||||
- 'confidence' (float): Detection confidence score (0.0 to 1.0)
|
- confidence (float): Detection confidence score (0.0 to 1.0)
|
||||||
- 'landmarks' (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
- landmarks (np.ndarray): 5-point facial landmarks with shape (5, 2)
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> faces = detector.detect(image)
|
>>> faces = detector.detect(image)
|
||||||
>>> for face in faces:
|
>>> for face in faces:
|
||||||
... bbox = face['bbox'] # np.ndarray with shape (4,)
|
... bbox = face.bbox # np.ndarray with shape (4,)
|
||||||
... confidence = face['confidence'] # float
|
... confidence = face.confidence # float
|
||||||
... landmarks = face['landmarks'] # np.ndarray with shape (5, 2)
|
... landmarks = face.landmarks # np.ndarray with shape (5, 2)
|
||||||
... # Can pass landmarks directly to recognition
|
... # Can pass landmarks directly to recognition
|
||||||
... embedding = recognizer.get_normalized_embedding(image, landmarks)
|
... embedding = recognizer.get_normalized_embedding(image, face.landmarks)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
original_height, original_width = image.shape[:2]
|
original_height, original_width = image.shape[:2]
|
||||||
@@ -330,11 +331,11 @@ class YOLOv5Face(BaseDetector):
|
|||||||
|
|
||||||
faces = []
|
faces = []
|
||||||
for i in range(detections.shape[0]):
|
for i in range(detections.shape[0]):
|
||||||
face_dict = {
|
face = Face(
|
||||||
'bbox': detections[i, :4],
|
bbox=detections[i, :4],
|
||||||
'confidence': float(detections[i, 4]),
|
confidence=float(detections[i, 4]),
|
||||||
'landmarks': landmarks[i],
|
landmarks=landmarks[i],
|
||||||
}
|
)
|
||||||
faces.append(face_dict)
|
faces.append(face)
|
||||||
|
|
||||||
return faces
|
return faces
|
||||||
|
|||||||
Reference in New Issue
Block a user