mirror of
https://github.com/yakhyo/uniface.git
synced 2025-12-30 09:02:25 +00:00
* refactor: Standardize naming conventions * chore: Update the version and re-run experiments * chore: Improve code quality tooling and documentation - Add pre-commit job to CI workflow for automated linting on PRs - Update uniface/__init__.py with copyright header, module docstring, and logically grouped exports - Revise CONTRIBUTING.md to reflect pre-commit handles all formatting - Remove redundant ruff check from CI (now handled by pre-commit) - Update build job Python version to 3.11 (matches requires-python)
342 lines
10 KiB
Python
342 lines
10 KiB
Python
# Copyright 2025 Yakhyokhuja Valikhujaev
|
|
# Author: Yakhyokhuja Valikhujaev
|
|
# GitHub: https://github.com/yakhyo
|
|
|
|
"""Visualization utilities for UniFace.
|
|
|
|
This module provides functions for drawing detection results, gaze directions,
|
|
and face parsing segmentation maps on images.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import cv2
|
|
import numpy as np
|
|
|
|
__all__ = [
|
|
'FACE_PARSING_COLORS',
|
|
'FACE_PARSING_LABELS',
|
|
'draw_detections',
|
|
'draw_fancy_bbox',
|
|
'draw_gaze',
|
|
'vis_parsing_maps',
|
|
]
|
|
|
|
# Face parsing component names (19 classes)
|
|
FACE_PARSING_LABELS = [
|
|
'background',
|
|
'skin',
|
|
'l_brow',
|
|
'r_brow',
|
|
'l_eye',
|
|
'r_eye',
|
|
'eye_g',
|
|
'l_ear',
|
|
'r_ear',
|
|
'ear_r',
|
|
'nose',
|
|
'mouth',
|
|
'u_lip',
|
|
'l_lip',
|
|
'neck',
|
|
'neck_l',
|
|
'cloth',
|
|
'hair',
|
|
'hat',
|
|
]
|
|
|
|
# Color palette for face parsing visualization
|
|
FACE_PARSING_COLORS = [
|
|
[0, 0, 0],
|
|
[255, 85, 0],
|
|
[255, 170, 0],
|
|
[255, 0, 85],
|
|
[255, 0, 170],
|
|
[0, 255, 0],
|
|
[85, 255, 0],
|
|
[170, 255, 0],
|
|
[0, 255, 85],
|
|
[0, 255, 170],
|
|
[0, 0, 255],
|
|
[85, 0, 255],
|
|
[170, 0, 255],
|
|
[0, 85, 255],
|
|
[0, 170, 255],
|
|
[255, 255, 0],
|
|
[255, 255, 85],
|
|
[255, 255, 170],
|
|
[255, 0, 255],
|
|
]
|
|
|
|
|
|
def draw_detections(
|
|
*,
|
|
image: np.ndarray,
|
|
bboxes: list[np.ndarray] | list[list[float]],
|
|
scores: np.ndarray | list[float],
|
|
landmarks: list[np.ndarray] | list[list[list[float]]],
|
|
vis_threshold: float = 0.6,
|
|
draw_score: bool = False,
|
|
fancy_bbox: bool = True,
|
|
) -> None:
|
|
"""Draw bounding boxes, landmarks, and optional scores on an image.
|
|
|
|
Modifies the image in-place.
|
|
|
|
Args:
|
|
image: Input image to draw on (modified in-place).
|
|
bboxes: List of bounding boxes as [x1, y1, x2, y2].
|
|
scores: List of confidence scores.
|
|
landmarks: List of landmark sets with shape (5, 2).
|
|
vis_threshold: Confidence threshold for filtering. Defaults to 0.6.
|
|
draw_score: Whether to draw confidence scores. Defaults to False.
|
|
fancy_bbox: Use corner-style bounding boxes. Defaults to True.
|
|
"""
|
|
colors = [(0, 0, 255), (0, 255, 255), (255, 0, 255), (0, 255, 0), (255, 0, 0)]
|
|
|
|
# Calculate line thickness based on image size
|
|
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
|
|
|
# Filter detections by confidence threshold
|
|
keep_indices = [i for i, score in enumerate(scores) if score >= vis_threshold]
|
|
|
|
for i in keep_indices:
|
|
bbox = np.array(bboxes[i], dtype=np.int32)
|
|
score = scores[i]
|
|
landmark_set = np.array(landmarks[i], dtype=np.int32)
|
|
|
|
# Calculate dynamic font scale based on bbox height
|
|
bbox_h = bbox[3] - bbox[1]
|
|
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
|
font_thickness = 2
|
|
|
|
# Draw bounding box
|
|
if fancy_bbox:
|
|
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness, proportion=0.2)
|
|
else:
|
|
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0, 255, 0), line_thickness)
|
|
|
|
# Draw confidence score with background
|
|
if draw_score:
|
|
text = f'{score:.2f}'
|
|
(text_width, text_height), baseline = cv2.getTextSize(
|
|
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
|
)
|
|
|
|
# Draw background rectangle
|
|
cv2.rectangle(
|
|
image,
|
|
(bbox[0], bbox[1] - text_height - baseline - 10),
|
|
(bbox[0] + text_width + 10, bbox[1]),
|
|
(0, 255, 0),
|
|
-1,
|
|
)
|
|
|
|
# Draw text
|
|
cv2.putText(
|
|
image,
|
|
text,
|
|
(bbox[0] + 5, bbox[1] - 5),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
font_scale,
|
|
(0, 0, 0),
|
|
font_thickness,
|
|
)
|
|
|
|
# Draw landmarks
|
|
for j, point in enumerate(landmark_set):
|
|
cv2.circle(image, tuple(point), line_thickness + 1, colors[j], -1)
|
|
|
|
|
|
def draw_fancy_bbox(
|
|
image: np.ndarray,
|
|
bbox: np.ndarray,
|
|
color: tuple[int, int, int] = (0, 255, 0),
|
|
thickness: int = 3,
|
|
proportion: float = 0.2,
|
|
) -> None:
|
|
"""Draw a bounding box with fancy corners on an image.
|
|
|
|
Args:
|
|
image: Input image to draw on (modified in-place).
|
|
bbox: Bounding box coordinates [x1, y1, x2, y2].
|
|
color: Color of the bounding box in BGR. Defaults to green.
|
|
thickness: Thickness of the corner lines. Defaults to 3.
|
|
proportion: Proportion of corner length to box dimensions. Defaults to 0.2.
|
|
"""
|
|
x1, y1, x2, y2 = map(int, bbox)
|
|
width = x2 - x1
|
|
height = y2 - y1
|
|
|
|
corner_length = int(proportion * min(width, height))
|
|
|
|
# Draw the rectangle
|
|
cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
|
|
|
|
# Top-left corner
|
|
cv2.line(image, (x1, y1), (x1 + corner_length, y1), color, thickness)
|
|
cv2.line(image, (x1, y1), (x1, y1 + corner_length), color, thickness)
|
|
|
|
# Top-right corner
|
|
cv2.line(image, (x2, y1), (x2 - corner_length, y1), color, thickness)
|
|
cv2.line(image, (x2, y1), (x2, y1 + corner_length), color, thickness)
|
|
|
|
# Bottom-left corner
|
|
cv2.line(image, (x1, y2), (x1, y2 - corner_length), color, thickness)
|
|
cv2.line(image, (x1, y2), (x1 + corner_length, y2), color, thickness)
|
|
|
|
# Bottom-right corner
|
|
cv2.line(image, (x2, y2), (x2, y2 - corner_length), color, thickness)
|
|
cv2.line(image, (x2, y2), (x2 - corner_length, y2), color, thickness)
|
|
|
|
|
|
def draw_gaze(
|
|
image: np.ndarray,
|
|
bbox: np.ndarray,
|
|
pitch: np.ndarray | float,
|
|
yaw: np.ndarray | float,
|
|
*,
|
|
draw_bbox: bool = True,
|
|
fancy_bbox: bool = True,
|
|
draw_angles: bool = True,
|
|
) -> None:
|
|
"""Draw gaze direction with optional bounding box on an image.
|
|
|
|
Args:
|
|
image: Input image to draw on (modified in-place).
|
|
bbox: Face bounding box [x1, y1, x2, y2].
|
|
pitch: Vertical gaze angle in radians.
|
|
yaw: Horizontal gaze angle in radians.
|
|
draw_bbox: Whether to draw the bounding box. Defaults to True.
|
|
fancy_bbox: Use fancy corner-style bbox. Defaults to True.
|
|
draw_angles: Whether to display pitch/yaw values as text. Defaults to True.
|
|
"""
|
|
x_min, y_min, x_max, y_max = map(int, bbox[:4])
|
|
|
|
# Calculate dynamic line thickness based on image size (same as draw_detections)
|
|
line_thickness = max(round(sum(image.shape[:2]) / 2 * 0.003), 2)
|
|
|
|
# Calculate dynamic font scale based on bbox height (same as draw_detections)
|
|
bbox_h = y_max - y_min
|
|
font_scale = max(0.4, min(0.7, bbox_h / 200))
|
|
font_thickness = 2
|
|
|
|
# Draw bounding box if requested
|
|
if draw_bbox:
|
|
if fancy_bbox:
|
|
draw_fancy_bbox(image, bbox, color=(0, 255, 0), thickness=line_thickness)
|
|
else:
|
|
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 255, 0), line_thickness)
|
|
|
|
# Calculate center of the bounding box
|
|
x_center = (x_min + x_max) // 2
|
|
y_center = (y_min + y_max) // 2
|
|
|
|
# Calculate the direction of the gaze
|
|
length = x_max - x_min
|
|
dx = int(-length * np.sin(pitch) * np.cos(yaw))
|
|
dy = int(-length * np.sin(yaw))
|
|
|
|
point1 = (x_center, y_center)
|
|
point2 = (x_center + dx, y_center + dy)
|
|
|
|
# Calculate dynamic center point radius based on line thickness
|
|
center_radius = max(line_thickness + 1, 4)
|
|
|
|
# Draw gaze direction
|
|
cv2.circle(image, (x_center, y_center), radius=center_radius, color=(0, 0, 255), thickness=-1)
|
|
cv2.arrowedLine(
|
|
image,
|
|
point1,
|
|
point2,
|
|
color=(0, 0, 255),
|
|
thickness=line_thickness,
|
|
line_type=cv2.LINE_AA,
|
|
tipLength=0.25,
|
|
)
|
|
|
|
# Draw angle values
|
|
if draw_angles:
|
|
text = f'P:{np.degrees(pitch):.0f}deg Y:{np.degrees(yaw):.0f}deg'
|
|
(text_width, text_height), baseline = cv2.getTextSize(
|
|
text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness
|
|
)
|
|
|
|
# Draw background rectangle for text
|
|
cv2.rectangle(
|
|
image,
|
|
(x_min, y_min - text_height - baseline - 10),
|
|
(x_min + text_width + 10, y_min),
|
|
(0, 0, 255),
|
|
-1,
|
|
)
|
|
|
|
# Draw text
|
|
cv2.putText(
|
|
image,
|
|
text,
|
|
(x_min + 5, y_min - 5),
|
|
cv2.FONT_HERSHEY_SIMPLEX,
|
|
font_scale,
|
|
(255, 255, 255),
|
|
font_thickness,
|
|
)
|
|
|
|
|
|
def vis_parsing_maps(
|
|
image: np.ndarray,
|
|
segmentation_mask: np.ndarray,
|
|
*,
|
|
save_image: bool = False,
|
|
save_path: str = 'result.png',
|
|
) -> np.ndarray:
|
|
"""Visualize face parsing segmentation mask by overlaying colored regions.
|
|
|
|
Args:
|
|
image: Input face image in RGB format with shape (H, W, 3).
|
|
segmentation_mask: Segmentation mask with shape (H, W) where each pixel
|
|
value represents a facial component class (0-18).
|
|
save_image: Whether to save the visualization to disk. Defaults to False.
|
|
save_path: Path to save the visualization if save_image is True.
|
|
|
|
Returns:
|
|
Blended image with segmentation overlay in BGR format.
|
|
|
|
Example:
|
|
>>> import cv2
|
|
>>> from uniface.parsing import BiSeNet
|
|
>>> from uniface.visualization import vis_parsing_maps
|
|
>>> parser = BiSeNet()
|
|
>>> face_image = cv2.imread('face.jpg')
|
|
>>> mask = parser.parse(face_image)
|
|
>>> face_rgb = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
|
|
>>> result = vis_parsing_maps(face_rgb, mask)
|
|
>>> cv2.imwrite('parsed_face.jpg', result)
|
|
"""
|
|
# Create numpy arrays for image and segmentation mask
|
|
image = np.array(image).copy().astype(np.uint8)
|
|
segmentation_mask = segmentation_mask.copy().astype(np.uint8)
|
|
|
|
# Create a color mask
|
|
segmentation_mask_color = np.zeros((segmentation_mask.shape[0], segmentation_mask.shape[1], 3))
|
|
|
|
num_classes = np.max(segmentation_mask)
|
|
|
|
for class_index in range(1, num_classes + 1):
|
|
class_pixels = np.where(segmentation_mask == class_index)
|
|
segmentation_mask_color[class_pixels[0], class_pixels[1], :] = FACE_PARSING_COLORS[class_index]
|
|
|
|
segmentation_mask_color = segmentation_mask_color.astype(np.uint8)
|
|
|
|
# Convert image to BGR format for blending
|
|
bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
|
|
|
|
# Blend the image with the segmentation mask
|
|
blended_image = cv2.addWeighted(bgr_image, 0.6, segmentation_mask_color, 0.4, 0)
|
|
|
|
# Save the result if required
|
|
if save_image:
|
|
cv2.imwrite(save_path, blended_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
|
|
|
|
return blended_image
|