Files
insightface/detection/retinaface_anticov/rcnn/processing/bbox_regression.py

264 lines
10 KiB
Python
Raw Normal View History

"""
This file has functions about generating bounding box regression targets
"""
from ..pycocotools.mask import encode
import numpy as np
from ..logger import logger
from .bbox_transform import bbox_overlaps, bbox_transform
from rcnn.config import config
import math
import cv2
import PIL.Image as Image
import threading
import Queue
def compute_bbox_regression_targets(rois, overlaps, labels):
"""
given rois, overlaps, gt labels, compute bounding box regression targets
:param rois: roidb[i]['boxes'] k * 4
:param overlaps: roidb[i]['max_overlaps'] k * 1
:param labels: roidb[i]['max_classes'] k * 1
:return: targets[i][class, dx, dy, dw, dh] k * 5
"""
# Ensure ROIs are floats
rois = rois.astype(np.float, copy=False)
# Sanity check
if len(rois) != len(overlaps):
logger.warning('bbox regression: len(rois) != len(overlaps)')
# Indices of ground-truth ROIs
gt_inds = np.where(overlaps == 1)[0]
if len(gt_inds) == 0:
logger.warning('bbox regression: len(gt_inds) == 0')
# Indices of examples for which we try to make predictions
ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
# Get IoU overlap between each ex ROI and gt ROI
ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
# Find which gt ROI each ex ROI has max overlap with:
# this will be the ex ROI's gt target
gt_assignment = ex_gt_overlaps.argmax(axis=1)
gt_rois = rois[gt_inds[gt_assignment], :]
ex_rois = rois[ex_inds, :]
targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
targets[ex_inds, 0] = labels[ex_inds]
targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
return targets
def add_bbox_regression_targets(roidb):
"""
given roidb, add ['bbox_targets'] and normalize bounding box regression targets
:param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
:return: means, std variances of targets
"""
logger.info('bbox regression: add bounding box regression targets')
assert len(roidb) > 0
assert 'max_classes' in roidb[0]
num_images = len(roidb)
num_classes = roidb[0]['gt_overlaps'].shape[1]
for im_i in range(num_images):
rois = roidb[im_i]['boxes']
max_overlaps = roidb[im_i]['max_overlaps']
max_classes = roidb[im_i]['max_classes']
2020-11-06 13:59:21 +08:00
roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(
rois, max_overlaps, max_classes)
if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
# use fixed / precomputed means and stds instead of empirical values
means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1))
stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1))
else:
# compute mean, std values
class_counts = np.zeros((num_classes, 1)) + 1e-14
sums = np.zeros((num_classes, 4))
squared_sums = np.zeros((num_classes, 4))
for im_i in range(num_images):
targets = roidb[im_i]['bbox_targets']
for cls in range(1, num_classes):
cls_indexes = np.where(targets[:, 0] == cls)[0]
if cls_indexes.size > 0:
class_counts[cls] += cls_indexes.size
sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
2020-11-06 13:59:21 +08:00
squared_sums[cls, :] += (targets[cls_indexes,
1:]**2).sum(axis=0)
means = sums / class_counts
# var(x) = E(x^2) - E(x)^2
2020-11-06 13:59:21 +08:00
stds = np.sqrt(squared_sums / class_counts - means**2)
# normalized targets
for im_i in range(num_images):
targets = roidb[im_i]['bbox_targets']
for cls in range(1, num_classes):
cls_indexes = np.where(targets[:, 0] == cls)[0]
roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :]
roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :]
return means.ravel(), stds.ravel()
def expand_bbox_regression_targets(bbox_targets_data, num_classes):
"""
expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets
:param bbox_targets_data: [k * 5]
:param num_classes: number of classes
:return: bbox target processed [k * 4 num_classes]
bbox_weights ! only foreground boxes have bbox regression computation!
"""
classes = bbox_targets_data[:, 0]
bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32)
bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
indexes = np.where(classes > 0)[0]
for index in indexes:
cls = classes[index]
start = int(4 * cls)
end = start + 4
bbox_targets[index, start:end] = bbox_targets_data[index, 1:]
bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS
return bbox_targets, bbox_weights
def compute_mask_and_label(ex_rois, ex_labels, seg, flipped):
# assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt)
# im = Image.open(seg_gt)
# pixel = list(im.getdata())
# pixel = np.array(pixel).reshape([im.size[1], im.size[0]])
im = Image.open(seg)
pixel = list(im.getdata())
ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]])
if flipped:
ins_seg = ins_seg[:, ::-1]
rois = ex_rois
n_rois = ex_rois.shape[0]
label = ex_labels
class_id = config.CLASS_ID
mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8)
mask_label = np.zeros((n_rois), dtype=np.int8)
for n in range(n_rois):
2020-11-06 13:59:21 +08:00
target = ins_seg[int(rois[n, 1]):int(rois[n, 3]),
int(rois[n, 0]):int(rois[n, 2])]
ids = np.unique(target)
ins_id = 0
max_count = 0
for id in ids:
if math.floor(id / 1000) == class_id[int(label[int(n)])]:
px = np.where(ins_seg == int(id))
x_min = np.min(px[1])
y_min = np.min(px[0])
x_max = np.max(px[1])
y_max = np.max(px[0])
x1 = max(rois[n, 0], x_min)
y1 = max(rois[n, 1], y_min)
x2 = min(rois[n, 2], x_max)
y2 = min(rois[n, 3], y_max)
iou = (x2 - x1) * (y2 - y1)
2020-11-06 13:59:21 +08:00
iou = iou / ((rois[n, 2] - rois[n, 0]) *
(rois[n, 3] - rois[n, 1]) + (x_max - x_min) *
(y_max - y_min) - iou)
if iou > max_count:
ins_id = id
max_count = iou
if max_count == 0:
continue
# print max_count
mask = np.zeros(target.shape)
idx = np.where(target == ins_id)
mask[idx] = 1
mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST)
mask_target[n] = mask
mask_label[n] = label[int(n)]
return mask_target, mask_label
def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped):
"""
given rois, overlaps, gt labels, seg, compute bounding box mask targets
:param rois: roidb[i]['boxes'] k * 4
:param overlaps: roidb[i]['max_overlaps'] k * 1
:param labels: roidb[i]['max_classes'] k * 1
:return: targets[i][class, dx, dy, dw, dh] k * 5
"""
# Ensure ROIs are floats
rois = rois.astype(np.float, copy=False)
# Sanity check
if len(rois) != len(overlaps):
print 'bbox regression: this should not happen'
# Indices of ground-truth ROIs
gt_inds = np.where(overlaps == 1)[0]
if len(gt_inds) == 0:
print 'something wrong : zero ground truth rois'
# Indices of examples for which we try to make predictions
ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
# Get IoU overlap between each ex ROI and gt ROI
ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])
# Find which gt ROI each ex ROI has max overlap with:
# this will be the ex ROI's gt target
gt_assignment = ex_gt_overlaps.argmax(axis=1)
gt_rois = rois[gt_inds[gt_assignment], :]
ex_rois = rois[ex_inds, :]
2020-11-06 13:59:21 +08:00
mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds],
seg, flipped)
return mask_targets, mask_label, ex_inds
2020-11-06 13:59:21 +08:00
def add_mask_targets(roidb):
"""
given roidb, add ['bbox_targets'] and normalize bounding box regression targets
:param roidb: roidb to be processed. must have gone through imdb.prepare_roidb
:return: means, std variances of targets
"""
print 'add bounding box mask targets'
assert len(roidb) > 0
assert 'max_classes' in roidb[0]
num_images = len(roidb)
# Multi threads processing
im_quene = Queue.Queue(maxsize=0)
for im_i in range(num_images):
im_quene.put(im_i)
def process():
while not im_quene.empty():
im_i = im_quene.get()
print "-----process img {}".format(im_i)
rois = roidb[im_i]['boxes']
max_overlaps = roidb[im_i]['max_overlaps']
max_classes = roidb[im_i]['max_classes']
ins_seg = roidb[im_i]['ins_seg']
flipped = roidb[im_i]['flipped']
roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped)
2020-11-06 13:59:21 +08:00
threads = [threading.Thread(target=process, args=()) for i in range(10)]
2020-11-06 13:59:21 +08:00
for t in threads:
t.start()
for t in threads:
t.join()
# Single thread
# for im_i in range(num_images):
# print "-----processing img {}".format(im_i)
# rois = roidb[im_i]['boxes']
# max_overlaps = roidb[im_i]['max_overlaps']
# max_classes = roidb[im_i]['max_classes']
# ins_seg = roidb[im_i]['ins_seg']
# # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg)
# roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \
# compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg)