mirror of
https://gitcode.com/gh_mirrors/eas/EasyFace.git
synced 2026-05-20 06:07:58 +00:00
731 lines
25 KiB
Python
731 lines
25 KiB
Python
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
|
|
import os
|
|
|
|
import cv2
|
|
import matplotlib
|
|
import matplotlib.cm as cm
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from PIL import Image
|
|
|
|
from modelscope.outputs import OutputKeys
|
|
from modelscope.preprocessors.image import load_image
|
|
from modelscope.utils import logger as logging
|
|
|
|
logger = logging.get_logger()
|
|
|
|
|
|
def voc_ap(rec, prec):
|
|
|
|
# correct AP calculation
|
|
# first append sentinel values at the end
|
|
mrec = np.concatenate(([0.], rec, [1.]))
|
|
mpre = np.concatenate(([0.], prec, [0.]))
|
|
|
|
# compute the precision envelope
|
|
for i in range(mpre.size - 1, 0, -1):
|
|
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
|
|
|
|
# to calculate area under PR curve, look for points
|
|
# where X axis (recall) changes value
|
|
i = np.where(mrec[1:] != mrec[:-1])[0]
|
|
|
|
# and sum (\Delta recall) * prec
|
|
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
|
|
return ap
|
|
|
|
def image_eval(pred, gt, iou_thresh):
|
|
""" single image evaluation
|
|
pred: Nx5
|
|
gt: Nx4
|
|
ignore:
|
|
"""
|
|
_pred = pred.copy()
|
|
_gt = gt.copy()
|
|
pred_recall = np.zeros(_pred.shape[0])
|
|
recall_list = np.zeros(_gt.shape[0])
|
|
proposal_list = np.ones(_pred.shape[0])
|
|
|
|
#_pred[:, 2] = _pred[:, 2] + _pred[:, 0]
|
|
#_pred[:, 3] = _pred[:, 3] + _pred[:, 1]
|
|
_gt[:, 2] = _gt[:, 2] + _gt[:, 0]
|
|
_gt[:, 3] = _gt[:, 3] + _gt[:, 1]
|
|
|
|
|
|
for h in range(_pred.shape[0]):
|
|
gt_overlap = bbox_overlap(_gt, _pred[h])
|
|
#gt_overlap = gt_overlap_list[h]
|
|
max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax()
|
|
|
|
if max_overlap >= iou_thresh:
|
|
if recall_list[max_idx] == 0:
|
|
recall_list[max_idx] = 1
|
|
|
|
r_keep_index = np.where(recall_list == 1)[0]
|
|
pred_recall[h] = len(r_keep_index)
|
|
|
|
return pred_recall, proposal_list
|
|
|
|
def img_pr_info(thresh_num, pred_info, proposal_list, pred_recall):
|
|
pr_info = np.zeros((thresh_num, 2)).astype('float')
|
|
fp = np.zeros((pred_info.shape[0],), dtype=np.int32)
|
|
last_info = [-1, -1]
|
|
for t in range(thresh_num):
|
|
|
|
thresh = 1 - (t+1)/thresh_num
|
|
r_index = np.where(pred_info[:, 4] >= thresh)[0]
|
|
if len(r_index) == 0:
|
|
pr_info[t, 0] = 0
|
|
pr_info[t, 1] = 0
|
|
else:
|
|
r_index = r_index[-1]
|
|
p_index = np.where(proposal_list[:r_index+1] == 1)[0]
|
|
pr_info[t, 0] = len(p_index) #valid pred number
|
|
pr_info[t, 1] = pred_recall[r_index] # valid gt number
|
|
|
|
if t>0 and pr_info[t, 0] > pr_info[t-1,0] and pr_info[t, 1]==pr_info[t-1,1]:
|
|
fp[r_index] = 1
|
|
return pr_info, fp
|
|
|
|
def gen_gt_info(img_gt):
|
|
gt_info = {}
|
|
fo = open(img_gt)
|
|
for line in fo:
|
|
if 'jpg' in line:
|
|
img_name = line.strip()
|
|
gt_info[img_name] = []
|
|
continue
|
|
gt_info[img_name].append([float(item) for item in line.strip().split(' ')[:4]])
|
|
return gt_info
|
|
|
|
def dataset_pr_info(thresh_num, pr_curve, count_face):
|
|
_pr_curve = np.zeros((thresh_num, 2))
|
|
for i in range(thresh_num):
|
|
_pr_curve[i, 0] = pr_curve[i, 1] / pr_curve[i, 0]
|
|
_pr_curve[i, 1] = pr_curve[i, 1] / count_face
|
|
return _pr_curve
|
|
|
|
def bbox_overlap(a, b):
|
|
x1 = np.maximum(a[:,0], b[0])
|
|
y1 = np.maximum(a[:,1], b[1])
|
|
x2 = np.minimum(a[:,2], b[2])
|
|
y2 = np.minimum(a[:,3], b[3])
|
|
w = x2-x1+1
|
|
h = y2-y1+1
|
|
inter = w*h
|
|
aarea = (a[:,2]-a[:,0]+1) * (a[:,3]-a[:,1]+1)
|
|
barea = (b[2]-b[0]+1) * (b[3]-b[1]+1)
|
|
o = inter / (aarea+barea-inter)
|
|
o[w<=0] = 0
|
|
o[h<=0] = 0
|
|
return o
|
|
|
|
|
|
def numpy_to_cv2img(img_array):
|
|
"""to convert a np.array with shape(h, w) to cv2 img
|
|
|
|
Args:
|
|
img_array (np.array): input data
|
|
|
|
Returns:
|
|
cv2 img
|
|
"""
|
|
img_array = (img_array - img_array.min()) / (img_array.max() -
|
|
img_array.min() + 1e-5)
|
|
img_array = (img_array * 255).astype(np.uint8)
|
|
img_array = cv2.applyColorMap(img_array, cv2.COLORMAP_JET)
|
|
return img_array
|
|
|
|
|
|
def draw_joints(image, np_kps, score, threshold=0.2):
|
|
lst_parent_ids_17 = [0, 0, 0, 1, 2, 0, 0, 5, 6, 7, 8, 5, 6, 11, 12, 13, 14]
|
|
lst_left_ids_17 = [1, 3, 5, 7, 9, 11, 13, 15]
|
|
lst_right_ids_17 = [2, 4, 6, 8, 10, 12, 14, 16]
|
|
|
|
lst_parent_ids_15 = [0, 0, 1, 2, 3, 1, 5, 6, 14, 8, 9, 14, 11, 12, 1]
|
|
lst_left_ids_15 = [2, 3, 4, 8, 9, 10]
|
|
lst_right_ids_15 = [5, 6, 7, 11, 12, 13]
|
|
|
|
if np_kps.shape[0] == 17:
|
|
lst_parent_ids = lst_parent_ids_17
|
|
lst_left_ids = lst_left_ids_17
|
|
lst_right_ids = lst_right_ids_17
|
|
|
|
elif np_kps.shape[0] == 15:
|
|
lst_parent_ids = lst_parent_ids_15
|
|
lst_left_ids = lst_left_ids_15
|
|
lst_right_ids = lst_right_ids_15
|
|
|
|
for i in range(len(lst_parent_ids)):
|
|
pid = lst_parent_ids[i]
|
|
if i == pid:
|
|
continue
|
|
|
|
if (score[i] < threshold or score[1] < threshold):
|
|
continue
|
|
|
|
if i in lst_left_ids and pid in lst_left_ids:
|
|
color = (0, 255, 0)
|
|
elif i in lst_right_ids and pid in lst_right_ids:
|
|
color = (255, 0, 0)
|
|
else:
|
|
color = (0, 255, 255)
|
|
|
|
cv2.line(image, (int(np_kps[i, 0]), int(np_kps[i, 1])),
|
|
(int(np_kps[pid][0]), int(np_kps[pid, 1])), color, 3)
|
|
|
|
for i in range(np_kps.shape[0]):
|
|
if score[i] < threshold:
|
|
continue
|
|
cv2.circle(image, (int(np_kps[i, 0]), int(np_kps[i, 1])), 5,
|
|
(0, 0, 255), -1)
|
|
|
|
|
|
def draw_box(image, box):
|
|
cv2.rectangle(image, (int(box[0]), int(box[1])),
|
|
(int(box[2]), int(box[3])), (0, 0, 255), 2)
|
|
|
|
|
|
def realtime_object_detection_bbox_vis(image, bboxes):
|
|
for bbox in bboxes:
|
|
cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
|
|
(255, 0, 0), 2)
|
|
return image
|
|
|
|
|
|
def draw_keypoints(output, original_image):
|
|
poses = np.array(output[OutputKeys.KEYPOINTS])
|
|
scores = np.array(output[OutputKeys.SCORES])
|
|
boxes = np.array(output[OutputKeys.BOXES])
|
|
assert len(poses) == len(scores) and len(poses) == len(boxes)
|
|
image = cv2.imread(original_image, -1)
|
|
for i in range(len(poses)):
|
|
draw_box(image, np.array(boxes[i]))
|
|
draw_joints(image, np.array(poses[i]), np.array(scores[i]))
|
|
return image
|
|
|
|
|
|
def draw_106face_keypoints(in_path,
|
|
keypoints,
|
|
boxes,
|
|
scale=4.0,
|
|
save_path=None):
|
|
face_contour_point_index = [
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
|
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
|
|
]
|
|
left_eye_brow_point_index = [33, 34, 35, 36, 37, 38, 39, 40, 41, 33]
|
|
right_eye_brow_point_index = [42, 43, 44, 45, 46, 47, 48, 49, 50, 42]
|
|
left_eye_point_index = [66, 67, 68, 69, 70, 71, 72, 73, 66]
|
|
right_eye_point_index = [75, 76, 77, 78, 79, 80, 81, 82, 75]
|
|
nose_bridge_point_index = [51, 52, 53, 54]
|
|
nose_contour_point_index = [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65]
|
|
mouth_outer_point_index = [
|
|
84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 84
|
|
]
|
|
mouth_inter_point_index = [96, 97, 98, 99, 100, 101, 102, 103, 96]
|
|
|
|
img = cv2.imread(in_path)
|
|
|
|
for i in range(len(boxes)):
|
|
draw_box(img, np.array(boxes[i]))
|
|
|
|
image = cv2.resize(img, dsize=None, fx=scale, fy=scale)
|
|
|
|
def draw_line(point_index, image, point):
|
|
for i in range(len(point_index) - 1):
|
|
cur_index = point_index[i]
|
|
next_index = point_index[i + 1]
|
|
cur_pt = (int(point[cur_index][0] * scale),
|
|
int(point[cur_index][1] * scale))
|
|
next_pt = (int(point[next_index][0] * scale),
|
|
int(point[next_index][1] * scale))
|
|
cv2.line(image, cur_pt, next_pt, (0, 0, 255), thickness=2)
|
|
|
|
for i in range(len(keypoints)):
|
|
points = keypoints[i]
|
|
|
|
draw_line(face_contour_point_index, image, points)
|
|
draw_line(left_eye_brow_point_index, image, points)
|
|
draw_line(right_eye_brow_point_index, image, points)
|
|
draw_line(left_eye_point_index, image, points)
|
|
draw_line(right_eye_point_index, image, points)
|
|
draw_line(nose_bridge_point_index, image, points)
|
|
draw_line(nose_contour_point_index, image, points)
|
|
draw_line(mouth_outer_point_index, image, points)
|
|
draw_line(mouth_inter_point_index, image, points)
|
|
|
|
size = len(points)
|
|
for i in range(size):
|
|
x = int(points[i][0])
|
|
y = int(points[i][1])
|
|
cv2.putText(image, str(i), (int(x * scale), int(y * scale)),
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
|
|
cv2.circle(image, (int(x * scale), int(y * scale)), 2, (0, 255, 0),
|
|
cv2.FILLED)
|
|
|
|
if save_path is not None:
|
|
cv2.imwrite(save_path, image)
|
|
|
|
return image
|
|
|
|
|
|
def draw_face_detection_no_lm_result(img_path, detection_result):
|
|
bboxes = np.array(detection_result[OutputKeys.BOXES])
|
|
scores = np.array(detection_result[OutputKeys.SCORES])
|
|
img = cv2.imread(img_path)
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
for i in range(len(scores)):
|
|
bbox = bboxes[i].astype(np.int32)
|
|
x1, y1, x2, y2 = bbox
|
|
score = scores[i]
|
|
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
|
cv2.putText(img,
|
|
f'{score:.2f}', (x1, y2),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
print(f'Found {len(scores)} faces')
|
|
return img
|
|
|
|
|
|
def draw_facial_expression_result(img_path, facial_expression_result):
|
|
scores = facial_expression_result[OutputKeys.SCORES]
|
|
labels = facial_expression_result[OutputKeys.LABELS]
|
|
label = labels[np.argmax(scores)]
|
|
img = cv2.imread(img_path)
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
cv2.putText(img,
|
|
'facial expression: {}'.format(label), (10, 10),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
print('facial expression: {}'.format(label))
|
|
return img
|
|
|
|
|
|
def draw_face_attribute_result(img_path, face_attribute_result):
|
|
scores = face_attribute_result[OutputKeys.SCORES]
|
|
labels = face_attribute_result[OutputKeys.LABELS]
|
|
label_gender = labels[0][np.argmax(scores[0])]
|
|
label_age = labels[1][np.argmax(scores[1])]
|
|
img = cv2.imread(img_path)
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
cv2.putText(img,
|
|
'face gender: {}'.format(label_gender), (10, 10),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
|
|
cv2.putText(img,
|
|
'face age interval: {}'.format(label_age), (10, 40),
|
|
1,
|
|
1.0, (255, 0, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
logger.info('face gender: {}'.format(label_gender))
|
|
logger.info('face age interval: {}'.format(label_age))
|
|
return img
|
|
|
|
|
|
def draw_face_detection_result(img_path, detection_result):
|
|
bboxes = np.array(detection_result[OutputKeys.BOXES])
|
|
kpss = np.array(detection_result[OutputKeys.KEYPOINTS])
|
|
scores = np.array(detection_result[OutputKeys.SCORES])
|
|
img = cv2.imread(img_path)
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
for i in range(len(scores)):
|
|
bbox = bboxes[i].astype(np.int32)
|
|
kps = kpss[i].reshape(-1, 2).astype(np.int32)
|
|
score = scores[i]
|
|
x1, y1, x2, y2 = bbox
|
|
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2)
|
|
for kp in kps:
|
|
cv2.circle(img, tuple(kp), 1, (0, 0, 255), 1)
|
|
cv2.putText(img,
|
|
f'{score:.2f}', (x1, y2),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
print(f'Found {len(scores)} faces')
|
|
return img
|
|
|
|
|
|
def draw_card_detection_result(img_path, detection_result):
|
|
def warp_img(src_img, kps, ratio):
|
|
short_size = 500
|
|
if ratio > 1:
|
|
obj_h = short_size
|
|
obj_w = int(obj_h * ratio)
|
|
else:
|
|
obj_w = short_size
|
|
obj_h = int(obj_w / ratio)
|
|
input_pts = np.float32([kps[0], kps[1], kps[2], kps[3]])
|
|
output_pts = np.float32([[0, obj_h - 1], [0, 0], [obj_w - 1, 0],
|
|
[obj_w - 1, obj_h - 1]])
|
|
M = cv2.getPerspectiveTransform(input_pts, output_pts)
|
|
obj_img = cv2.warpPerspective(src_img, M, (obj_w, obj_h))
|
|
return obj_img
|
|
|
|
bboxes = np.array(detection_result[OutputKeys.BOXES])
|
|
kpss = np.array(detection_result[OutputKeys.KEYPOINTS])
|
|
scores = np.array(detection_result[OutputKeys.SCORES])
|
|
img_list = []
|
|
ver_col = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (0, 255, 255)]
|
|
img = cv2.imread(img_path)
|
|
img_list += [img]
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
for i in range(len(scores)):
|
|
bbox = bboxes[i].astype(np.int32)
|
|
kps = kpss[i].reshape(-1, 2).astype(np.int32)
|
|
_w = (kps[0][0] - kps[3][0])**2 + (kps[0][1] - kps[3][1])**2
|
|
_h = (kps[0][0] - kps[1][0])**2 + (kps[0][1] - kps[1][1])**2
|
|
ratio = 1.59 if _w >= _h else 1 / 1.59
|
|
card_img = warp_img(img, kps, ratio)
|
|
img_list += [card_img]
|
|
score = scores[i]
|
|
x1, y1, x2, y2 = bbox
|
|
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 4)
|
|
for k, kp in enumerate(kps):
|
|
cv2.circle(img, tuple(kp), 1, color=ver_col[k], thickness=10)
|
|
cv2.putText(img,
|
|
f'{score:.2f}', (x1, y2),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
return img_list
|
|
|
|
|
|
def created_boxed_image(image_in, box):
|
|
image = load_image(image_in)
|
|
img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
|
|
cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
|
|
(0, 255, 0), 3)
|
|
return img
|
|
|
|
|
|
def show_video_tracking_result(video_in_path, bboxes, video_save_path):
|
|
cap = cv2.VideoCapture(video_in_path)
|
|
for i in range(len(bboxes)):
|
|
box = bboxes[i]
|
|
success, frame = cap.read()
|
|
if success is False:
|
|
raise Exception(video_in_path,
|
|
' can not be correctly decoded by OpenCV.')
|
|
if i == 0:
|
|
size = (frame.shape[1], frame.shape[0])
|
|
fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
|
|
video_writer = cv2.VideoWriter(video_save_path, fourcc,
|
|
cap.get(cv2.CAP_PROP_FPS), size,
|
|
True)
|
|
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0),
|
|
5)
|
|
video_writer.write(frame)
|
|
video_writer.release
|
|
cap.release()
|
|
|
|
|
|
def show_video_object_detection_result(video_in_path, bboxes_list, labels_list,
|
|
video_save_path):
|
|
|
|
PALETTE = {
|
|
'person': [128, 0, 0],
|
|
'bicycle': [128, 128, 0],
|
|
'car': [64, 0, 0],
|
|
'motorcycle': [0, 128, 128],
|
|
'bus': [64, 128, 0],
|
|
'truck': [192, 128, 0],
|
|
'traffic light': [64, 0, 128],
|
|
'stop sign': [192, 0, 128],
|
|
}
|
|
from tqdm import tqdm
|
|
import math
|
|
cap = cv2.VideoCapture(video_in_path)
|
|
with tqdm(total=len(bboxes_list)) as pbar:
|
|
pbar.set_description(
|
|
'Writing results to video: {}'.format(video_save_path))
|
|
for i in range(len(bboxes_list)):
|
|
bboxes = bboxes_list[i].astype(int)
|
|
labels = labels_list[i]
|
|
success, frame = cap.read()
|
|
if success is False:
|
|
raise Exception(video_in_path,
|
|
' can not be correctly decoded by OpenCV.')
|
|
if i == 0:
|
|
size = (frame.shape[1], frame.shape[0])
|
|
fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
|
|
video_writer = cv2.VideoWriter(video_save_path, fourcc,
|
|
cap.get(cv2.CAP_PROP_FPS), size,
|
|
True)
|
|
|
|
FONT_SCALE = 1e-3 # Adjust for larger font size in all images
|
|
THICKNESS_SCALE = 1e-3 # Adjust for larger thickness in all images
|
|
TEXT_Y_OFFSET_SCALE = 1e-2 # Adjust for larger Y-offset of text and bounding box
|
|
H, W, _ = frame.shape
|
|
zeros_mask = np.zeros((frame.shape)).astype(np.uint8)
|
|
for bbox, l in zip(bboxes, labels):
|
|
cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
|
|
PALETTE[l], 1)
|
|
cv2.putText(frame,
|
|
l,
|
|
(bbox[0], bbox[1] - int(TEXT_Y_OFFSET_SCALE * H)),
|
|
fontFace=cv2.FONT_HERSHEY_TRIPLEX,
|
|
fontScale=min(H, W) * FONT_SCALE,
|
|
thickness=math.ceil(min(H, W) * THICKNESS_SCALE),
|
|
color=PALETTE[l])
|
|
zeros_mask = cv2.rectangle(zeros_mask, (bbox[0], bbox[1]),
|
|
(bbox[2], bbox[3]),
|
|
color=PALETTE[l],
|
|
thickness=-1)
|
|
|
|
frame = cv2.addWeighted(frame, 1., zeros_mask, .65, 0)
|
|
video_writer.write(frame)
|
|
pbar.update(1)
|
|
video_writer.release
|
|
cap.release()
|
|
|
|
|
|
def panoptic_seg_masks_to_image(masks):
|
|
draw_img = np.zeros([masks[0].shape[0], masks[0].shape[1], 3])
|
|
from mmdet.core.visualization.palette import get_palette
|
|
mask_palette = get_palette('coco', 133)
|
|
|
|
from mmdet.core.visualization.image import _get_bias_color
|
|
taken_colors = set([0, 0, 0])
|
|
for i, mask in enumerate(masks):
|
|
color_mask = mask_palette[i]
|
|
while tuple(color_mask) in taken_colors:
|
|
color_mask = _get_bias_color(color_mask)
|
|
taken_colors.add(tuple(color_mask))
|
|
|
|
mask = mask.astype(bool)
|
|
draw_img[mask] = color_mask
|
|
|
|
return draw_img
|
|
|
|
|
|
def semantic_seg_masks_to_image(masks):
|
|
from mmdet.core.visualization.palette import get_palette
|
|
mask_palette = get_palette('coco', 133)
|
|
|
|
draw_img = np.zeros([masks[0].shape[0], masks[0].shape[1], 3])
|
|
|
|
for i, mask in enumerate(masks):
|
|
color_mask = mask_palette[i]
|
|
mask = mask.astype(bool)
|
|
draw_img[mask] = color_mask
|
|
return draw_img
|
|
|
|
|
|
def show_video_summarization_result(video_in_path, result, video_save_path):
|
|
frame_indexes = result[OutputKeys.OUTPUT]
|
|
cap = cv2.VideoCapture(video_in_path)
|
|
for i in range(len(frame_indexes)):
|
|
idx = frame_indexes[i]
|
|
success, frame = cap.read()
|
|
if success is False:
|
|
raise Exception(video_in_path,
|
|
' can not be correctly decoded by OpenCV.')
|
|
if i == 0:
|
|
size = (frame.shape[1], frame.shape[0])
|
|
fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
|
|
video_writer = cv2.VideoWriter(video_save_path, fourcc,
|
|
cap.get(cv2.CAP_PROP_FPS), size,
|
|
True)
|
|
if idx == 1:
|
|
video_writer.write(frame)
|
|
video_writer.release()
|
|
cap.release()
|
|
|
|
|
|
def show_image_object_detection_auto_result(img_path,
|
|
detection_result,
|
|
save_path=None):
|
|
scores = detection_result[OutputKeys.SCORES]
|
|
labels = detection_result[OutputKeys.LABELS]
|
|
bboxes = detection_result[OutputKeys.BOXES]
|
|
img = cv2.imread(img_path)
|
|
assert img is not None, f"Can't read img: {img_path}"
|
|
|
|
for (score, label, box) in zip(scores, labels, bboxes):
|
|
cv2.rectangle(img, (int(box[0]), int(box[1])),
|
|
(int(box[2]), int(box[3])), (0, 0, 255), 2)
|
|
cv2.putText(img,
|
|
f'{score:.2f}', (int(box[0]), int(box[1])),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
cv2.putText(img,
|
|
label, (int(box[0]), int(box[3])),
|
|
1,
|
|
1.0, (0, 255, 0),
|
|
thickness=1,
|
|
lineType=8)
|
|
|
|
if save_path is not None:
|
|
cv2.imwrite(save_path, img)
|
|
return img
|
|
|
|
|
|
def depth_to_color(depth):
|
|
colormap = plt.get_cmap('plasma')
|
|
depth_color = (colormap(
|
|
(depth.max() - depth) / depth.max()) * 2**8).astype(np.uint8)[:, :, :3]
|
|
depth_color = cv2.cvtColor(depth_color, cv2.COLOR_RGB2BGR)
|
|
return depth_color
|
|
|
|
|
|
def show_video_depth_estimation_result(depths, video_save_path):
|
|
height, width, layers = depths[0].shape
|
|
out = cv2.VideoWriter(video_save_path, cv2.VideoWriter_fourcc(*'MP4V'), 25,
|
|
(width, height))
|
|
for (i, img) in enumerate(depths):
|
|
out.write(cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR))
|
|
out.release()
|
|
|
|
|
|
def show_image_driving_perception_result(img,
|
|
results,
|
|
out_file='result.jpg',
|
|
if_draw=[1, 1, 1]):
|
|
assert img.shape == (720, 1280,
|
|
3), 'input image shape need fix to (720, 1280, 3)'
|
|
bboxes = results.get(OutputKeys.BOXES)[0]
|
|
if if_draw[0]:
|
|
for x in bboxes:
|
|
c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
|
|
cv2.rectangle(img,
|
|
c1,
|
|
c2, [255, 255, 0],
|
|
thickness=2,
|
|
lineType=cv2.LINE_AA)
|
|
|
|
result = results.get(OutputKeys.MASKS)
|
|
|
|
color_area = np.zeros((result[0].shape[0], result[0].shape[1], 3),
|
|
dtype=np.uint8)
|
|
|
|
if if_draw[1]:
|
|
color_area[result[0] == 1] = [0, 255, 0]
|
|
if if_draw[2]:
|
|
color_area[result[1] == 1] = [255, 0, 0]
|
|
color_seg = color_area
|
|
|
|
color_mask = np.mean(color_seg, 2)
|
|
msk_idx = color_mask != 0
|
|
img[msk_idx] = img[msk_idx] * 0.5 + color_seg[msk_idx] * 0.5
|
|
if out_file is not None:
|
|
cv2.imwrite(out_file, img[:, :, ::-1])
|
|
return img
|
|
|
|
|
|
def masks_visualization(masks, palette):
|
|
vis_masks = []
|
|
for f in range(masks.shape[0]):
|
|
img_E = Image.fromarray(masks[f])
|
|
img_E.putpalette(palette)
|
|
vis_masks.append(img_E)
|
|
return vis_masks
|
|
|
|
|
|
# This implementation is adopted from LoFTR,
|
|
# made public available under the Apache License, Version 2.0,
|
|
# at https://github.com/zju3dv/LoFTR
|
|
|
|
|
|
def make_matching_figure(img0,
|
|
img1,
|
|
mkpts0,
|
|
mkpts1,
|
|
color,
|
|
kpts0=None,
|
|
kpts1=None,
|
|
text=[],
|
|
dpi=75,
|
|
path=None):
|
|
# draw image pair
|
|
assert mkpts0.shape[0] == mkpts1.shape[
|
|
0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}'
|
|
fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
|
|
axes[0].imshow(img0, cmap='gray')
|
|
axes[1].imshow(img1, cmap='gray')
|
|
for i in range(2): # clear all frames
|
|
axes[i].get_yaxis().set_ticks([])
|
|
axes[i].get_xaxis().set_ticks([])
|
|
for spine in axes[i].spines.values():
|
|
spine.set_visible(False)
|
|
plt.tight_layout(pad=1)
|
|
|
|
if kpts0 is not None:
|
|
assert kpts1 is not None
|
|
axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=2)
|
|
axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=2)
|
|
|
|
# draw matches
|
|
if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0:
|
|
fig.canvas.draw()
|
|
transFigure = fig.transFigure.inverted()
|
|
fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
|
|
fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
|
|
fig.lines = [
|
|
matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]),
|
|
(fkpts0[i, 1], fkpts1[i, 1]),
|
|
transform=fig.transFigure,
|
|
c=color[i],
|
|
linewidth=1) for i in range(len(mkpts0))
|
|
]
|
|
|
|
axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4)
|
|
axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4)
|
|
|
|
# put txts
|
|
txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w'
|
|
fig.text(0.01,
|
|
0.99,
|
|
'\n'.join(text),
|
|
transform=fig.axes[0].transAxes,
|
|
fontsize=15,
|
|
va='top',
|
|
ha='left',
|
|
color=txt_color)
|
|
|
|
# save or return figure
|
|
if path:
|
|
plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
|
|
plt.close()
|
|
else:
|
|
return fig
|
|
|
|
|
|
def match_pair_visualization(img_name0,
|
|
img_name1,
|
|
kpts0,
|
|
kpts1,
|
|
conf,
|
|
output_filename='quadtree_match.png',
|
|
method='QuadTreeAttention'):
|
|
|
|
print(f'Found {len(kpts0)} matches')
|
|
|
|
# visualize the matches
|
|
img0 = cv2.imread(str(img_name0))
|
|
img1 = cv2.imread(str(img_name1))
|
|
|
|
# Draw
|
|
color = cm.jet(conf)
|
|
text = [
|
|
method,
|
|
'Matches: {}'.format(len(kpts0)),
|
|
]
|
|
fig = make_matching_figure(img0, img1, kpts0, kpts1, color, text=text)
|
|
|
|
# save the figure
|
|
fig.savefig(str(output_filename), dpi=300, bbox_inches='tight')
|