feat: Update detection module output to datalasses

This commit is contained in:
yakhyo
2025-12-22 19:25:38 +09:00
parent 96306a0910
commit 9bcbfa65c2
18 changed files with 165 additions and 189 deletions

View File

@@ -48,7 +48,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],
@@ -140,13 +140,13 @@
"\n",
" # Draw detections\n",
" bbox_image = image.copy()\n",
" bboxes = [f['bbox'] for f in faces]\n",
" scores = [f['confidence'] for f in faces]\n",
" landmarks = [f['landmarks'] for f in faces]\n",
" bboxes = [f.bbox for f in faces]\n",
" scores = [f.confidence for f in faces]\n",
" landmarks = [f.landmarks for f in faces]\n",
" draw_detections(image=bbox_image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
" # Align first detected face (returns aligned image and inverse transform matrix)\n",
" first_landmarks = faces[0]['landmarks']\n",
" first_landmarks = faces[0].landmarks\n",
" aligned_image, _ = face_alignment(image, first_landmarks, image_size=112)\n",
"\n",
" # Convert BGR to RGB for visualization\n",
@@ -202,7 +202,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of face dictionaries with `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with `bbox`, `confidence`, `landmarks` attributes\n",
"- Access attributes using dot notation: `face.bbox`, `face.landmarks`\n",
"- `face_alignment()` uses 5-point landmarks to align and crop the face\n",
"- Default output size is 112x112 (standard for face recognition models)\n"
]

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

File diff suppressed because one or more lines are too long

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],
@@ -153,14 +153,14 @@
"# Load image\n",
"image = cv2.imread(image_path)\n",
"\n",
"# Detect faces - returns list of face dictionaries\n",
"# Detect faces - returns list of Face objects\n",
"faces = detector.detect(image)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"# Unpack face data for visualization\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"# Draw detections\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
@@ -211,9 +211,9 @@
"faces = detector.detect(image, max_num=2)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -258,9 +258,9 @@
"faces = detector.detect(image, max_num=5)\n",
"print(f'Detected {len(faces)} face(s)')\n",
"\n",
"bboxes = [f['bbox'] for f in faces]\n",
"scores = [f['confidence'] for f in faces]\n",
"landmarks = [f['landmarks'] for f in faces]\n",
"bboxes = [f.bbox for f in faces]\n",
"scores = [f.confidence for f in faces]\n",
"landmarks = [f.landmarks for f in faces]\n",
"\n",
"draw_detections(image=image, bboxes=bboxes, scores=scores, landmarks=landmarks, vis_threshold=0.6, fancy_bbox=True)\n",
"\n",
@@ -274,7 +274,8 @@
"source": [
"## Notes\n",
"\n",
"- `detect()` returns a list of dictionaries with keys: `bbox`, `confidence`, `landmarks`\n",
"- `detect()` returns a list of `Face` objects with attributes: `bbox`, `confidence`, `landmarks`\n",
"- Access attributes using dot notation: `face.bbox`, `face.confidence`, `face.landmarks`\n",
"- Adjust `conf_thresh` and `nms_thresh` for your use case\n",
"- Use `max_num` to limit detected faces"
]

View File

@@ -46,7 +46,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.5.0\n"
"UniFace version: 1.6.0\n"
]
}
],
@@ -365,7 +365,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -379,7 +379,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,

View File

@@ -42,7 +42,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

View File

@@ -37,7 +37,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"1.3.1\n"
"1.6.0\n"
]
}
],

View File

@@ -44,7 +44,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"UniFace version: 1.4.0\n"
"UniFace version: 1.6.0\n"
]
}
],
@@ -152,8 +152,7 @@
"\n",
" # Estimate gaze for each face\n",
" for i, face in enumerate(faces):\n",
" bbox = face['bbox']\n",
" x1, y1, x2, y2 = map(int, bbox[:4])\n",
" x1, y1, x2, y2 = map(int, face.bbox[:4])\n",
" face_crop = image[y1:y2, x1:x2]\n",
"\n",
" if face_crop.size > 0:\n",
@@ -164,7 +163,7 @@
" print(f' Face {i+1}: pitch={pitch_deg:.1f}°, yaw={yaw_deg:.1f}°')\n",
"\n",
" # Draw gaze without angle text\n",
" draw_gaze(image, bbox, pitch, yaw, draw_angles=False)\n",
" draw_gaze(image, face.bbox, pitch, yaw, draw_angles=False)\n",
"\n",
" # Convert BGR to RGB for display\n",
" original_rgb = cv2.cvtColor(original, cv2.COLOR_BGR2RGB)\n",
@@ -249,7 +248,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
@@ -263,7 +262,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
"version": "3.13.5"
}
},
"nbformat": 4,