Files
uniface/scripts/run_face_parsing.py
Yakhyokhuja Valikhujaev 50226041c9 refactor: Standardize naming conventions (#47)
* refactor: Standardize naming conventions

* chore: Update the version and re-run experiments

* chore: Improve code quality tooling and documentation

- Add pre-commit job to CI workflow for automated linting on PRs
- Update uniface/__init__.py with copyright header, module docstring,
  and logically grouped exports
- Revise CONTRIBUTING.md to reflect pre-commit handles all formatting
- Remove redundant ruff check from CI (now handled by pre-commit)
- Update build job Python version to 3.11 (matches requires-python)
2025-12-30 00:20:34 +09:00

176 lines
5.6 KiB
Python

# Face parsing on detected faces
# Usage: python run_face_parsing.py --image path/to/image.jpg
# python run_face_parsing.py --webcam
import argparse
import os
from pathlib import Path
import cv2
import numpy as np
from uniface import RetinaFace
from uniface.constants import ParsingWeights
from uniface.parsing import BiSeNet
from uniface.visualization import vis_parsing_maps
def expand_bbox(
bbox: np.ndarray,
image_shape: tuple[int, int],
expand_ratio: float = 0.2,
expand_top_ratio: float = 0.4,
) -> tuple[int, int, int, int]:
"""
Expand bounding box to include full head region for face parsing.
Face detection typically returns tight face boxes, but face parsing
requires the full head including hair, ears, and neck.
Args:
bbox: Original bounding box [x1, y1, x2, y2].
image_shape: Image dimensions as (height, width).
expand_ratio: Expansion ratio for left, right, and bottom (default: 0.2 = 20%).
expand_top_ratio: Expansion ratio for top to capture hair/forehead (default: 0.4 = 40%).
Returns:
Tuple[int, int, int, int]: Expanded bbox (x1, y1, x2, y2) clamped to image bounds.
"""
x1, y1, x2, y2 = map(int, bbox[:4])
height, width = image_shape[:2]
# Calculate face dimensions
face_width = x2 - x1
face_height = y2 - y1
# Calculate expansion amounts
expand_x = int(face_width * expand_ratio)
expand_y_bottom = int(face_height * expand_ratio)
expand_y_top = int(face_height * expand_top_ratio)
# Expand and clamp to image boundaries
new_x1 = max(0, x1 - expand_x)
new_y1 = max(0, y1 - expand_y_top)
new_x2 = min(width, x2 + expand_x)
new_y2 = min(height, y2 + expand_y_bottom)
return new_x1, new_y1, new_x2, new_y2
def process_image(detector, parser, image_path: str, save_dir: str = 'outputs', expand_ratio: float = 0.2):
image = cv2.imread(image_path)
if image is None:
print(f"Error: Failed to load image from '{image_path}'")
return
faces = detector.detect(image)
print(f'Detected {len(faces)} face(s)')
result_image = image.copy()
for i, face in enumerate(faces):
# Expand bbox to include full head for parsing
x1, y1, x2, y2 = expand_bbox(face.bbox, image.shape, expand_ratio=expand_ratio)
face_crop = image[y1:y2, x1:x2]
if face_crop.size == 0:
continue
# Parse the face
mask = parser.parse(face_crop)
print(f' Face {i + 1}: parsed with {len(set(mask.flatten()))} unique classes')
# Visualize the parsing result
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
# Place the visualization back on the original image
result_image[y1:y2, x1:x2] = vis_result
# Draw expanded bounding box
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
os.makedirs(save_dir, exist_ok=True)
output_path = os.path.join(save_dir, f'{Path(image_path).stem}_parsing.jpg')
cv2.imwrite(output_path, result_image)
print(f'Output saved: {output_path}')
def run_webcam(detector, parser, expand_ratio: float = 0.2):
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print('Cannot open webcam')
return
print("Press 'q' to quit")
while True:
ret, frame = cap.read()
if not ret:
break
frame = cv2.flip(frame, 1)
faces = detector.detect(frame)
for face in faces:
# Expand bbox to include full head for parsing
x1, y1, x2, y2 = expand_bbox(face.bbox, frame.shape, expand_ratio=expand_ratio)
face_crop = frame[y1:y2, x1:x2]
if face_crop.size == 0:
continue
# Parse the face
mask = parser.parse(face_crop)
# Visualize the parsing result
face_crop_rgb = cv2.cvtColor(face_crop, cv2.COLOR_BGR2RGB)
vis_result = vis_parsing_maps(face_crop_rgb, mask, save_image=False)
# Place the visualization back on the frame
frame[y1:y2, x1:x2] = vis_result
# Draw expanded bounding box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f'Faces: {len(faces)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow('Face Parsing', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def main():
parser_arg = argparse.ArgumentParser(description='Run face parsing')
parser_arg.add_argument('--image', type=str, help='Path to input image')
parser_arg.add_argument('--webcam', action='store_true', help='Use webcam')
parser_arg.add_argument('--save_dir', type=str, default='outputs')
parser_arg.add_argument(
'--model', type=str, default=ParsingWeights.RESNET18, choices=[ParsingWeights.RESNET18, ParsingWeights.RESNET34]
)
parser_arg.add_argument(
'--expand-ratio',
type=float,
default=0.2,
help='Bbox expansion ratio for full head coverage (default: 0.2 = 20%%)',
)
args = parser_arg.parse_args()
if not args.image and not args.webcam:
parser_arg.error('Either --image or --webcam must be specified')
detector = RetinaFace()
parser = BiSeNet(model_name=ParsingWeights.RESNET34)
if args.webcam:
run_webcam(detector, parser, expand_ratio=args.expand_ratio)
else:
process_image(detector, parser, args.image, args.save_dir, expand_ratio=args.expand_ratio)
if __name__ == '__main__':
main()