From 51e267a82dbbbbc06bab90a57ed6674bb34a0c1b Mon Sep 17 00:00:00 2001 From: nttstar Date: Thu, 18 Apr 2019 15:28:38 +0800 Subject: [PATCH] retinaface test code --- retinaface/Makefile | 6 + retinaface/rcnn/dataset/__init__.py | 2 + retinaface/rcnn/dataset/ds_utils.py | 16 + retinaface/rcnn/dataset/imdb.py | 318 +++++++++ retinaface/rcnn/dataset/retinaface.py | 181 ++++++ retinaface/rcnn/logger.py | 6 + retinaface/rcnn/processing/__init__.py | 0 retinaface/rcnn/processing/assign_levels.py | 37 ++ retinaface/rcnn/processing/bbox_regression.py | 255 ++++++++ retinaface/rcnn/processing/bbox_transform.py | 216 +++++++ retinaface/rcnn/processing/generate_anchor.py | 127 ++++ retinaface/rcnn/processing/nms.py | 64 ++ retinaface/retinaface.py | 610 ++++++++++++++++++ retinaface/test_widerface.py | 199 ++++++ 14 files changed, 2037 insertions(+) create mode 100644 retinaface/Makefile create mode 100644 retinaface/rcnn/dataset/__init__.py create mode 100644 retinaface/rcnn/dataset/ds_utils.py create mode 100644 retinaface/rcnn/dataset/imdb.py create mode 100644 retinaface/rcnn/dataset/retinaface.py create mode 100644 retinaface/rcnn/logger.py create mode 100644 retinaface/rcnn/processing/__init__.py create mode 100755 retinaface/rcnn/processing/assign_levels.py create mode 100644 retinaface/rcnn/processing/bbox_regression.py create mode 100644 retinaface/rcnn/processing/bbox_transform.py create mode 100644 retinaface/rcnn/processing/generate_anchor.py create mode 100644 retinaface/rcnn/processing/nms.py create mode 100644 retinaface/retinaface.py create mode 100644 retinaface/test_widerface.py diff --git a/retinaface/Makefile b/retinaface/Makefile new file mode 100644 index 0000000..66a3ed0 --- /dev/null +++ b/retinaface/Makefile @@ -0,0 +1,6 @@ +all: + cd rcnn/cython/; python setup.py build_ext --inplace; rm -rf build; cd ../../ + cd rcnn/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ +clean: + cd rcnn/cython/; rm *.so *.c *.cpp; cd ../../ + cd rcnn/pycocotools/; rm *.so; cd ../../ diff --git a/retinaface/rcnn/dataset/__init__.py b/retinaface/rcnn/dataset/__init__.py new file mode 100644 index 0000000..fcee572 --- /dev/null +++ b/retinaface/rcnn/dataset/__init__.py @@ -0,0 +1,2 @@ +from .imdb import IMDB +from .retinaface import retinaface diff --git a/retinaface/rcnn/dataset/ds_utils.py b/retinaface/rcnn/dataset/ds_utils.py new file mode 100644 index 0000000..9432515 --- /dev/null +++ b/retinaface/rcnn/dataset/ds_utils.py @@ -0,0 +1,16 @@ +import numpy as np + + +def unique_boxes(boxes, scale=1.0): + """ return indices of unique boxes """ + v = np.array([1, 1e3, 1e6, 1e9]) + hashes = np.round(boxes * scale).dot(v).astype(np.int) + _, index = np.unique(hashes, return_index=True) + return np.sort(index) + + +def filter_small_boxes(boxes, min_size): + w = boxes[:, 2] - boxes[:, 0] + h = boxes[:, 3] - boxes[:, 1] + keep = np.where((w >= min_size) & (h > min_size))[0] + return keep diff --git a/retinaface/rcnn/dataset/imdb.py b/retinaface/rcnn/dataset/imdb.py new file mode 100644 index 0000000..d034206 --- /dev/null +++ b/retinaface/rcnn/dataset/imdb.py @@ -0,0 +1,318 @@ +""" +General image database +An image database creates a list of relative image path called image_set_index and +transform index to absolute image path. As to training, it is necessary that ground +truth and proposals are mixed together for training. +roidb +basic format [image_index] +['image', 'height', 'width', 'flipped', +'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets'] +""" + +from ..logger import logger +import os +try: + import cPickle as pickle +except ImportError: + import pickle +import numpy as np +from ..processing.bbox_transform import bbox_overlaps + + +class IMDB(object): + def __init__(self, name, image_set, root_path, dataset_path): + """ + basic information about an image database + :param name: name of image database will be used for any output + :param root_path: root path store cache and proposal data + :param dataset_path: dataset path store images and image lists + """ + self.name = name + '_' + image_set + self.image_set = image_set + self.root_path = root_path + self.data_path = dataset_path + + # abstract attributes + self.classes = [] + self.num_classes = 0 + self.image_set_index = [] + self.num_images = 0 + + self.config = {} + + def image_path_from_index(self, index): + raise NotImplementedError + + def gt_roidb(self): + raise NotImplementedError + + def evaluate_detections(self, detections): + raise NotImplementedError + + @property + def cache_path(self): + """ + make a directory to store all caches + :return: cache path + """ + cache_path = os.path.join(self.root_path, 'cache') + if not os.path.exists(cache_path): + os.mkdir(cache_path) + return cache_path + + def image_path_at(self, index): + """ + access image at index in image database + :param index: image index in image database + :return: image path + """ + return self.image_path_from_index(self.image_set_index[index]) + + def load_rpn_data(self, full=False): + if full: + rpn_file = os.path.join(self.root_path, 'rpn_data', self.name + '_full_rpn.pkl') + else: + rpn_file = os.path.join(self.root_path, 'rpn_data', self.name + '_rpn.pkl') + assert os.path.exists(rpn_file), '%s rpn data not found at %s' % (self.name, rpn_file) + logger.info('%s loading rpn data from %s' % (self.name, rpn_file)) + with open(rpn_file, 'rb') as f: + box_list = pickle.load(f) + return box_list + + def load_rpn_roidb(self, gt_roidb): + """ + turn rpn detection boxes into roidb + :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + """ + box_list = self.load_rpn_data() + return self.create_roidb_from_box_list(box_list, gt_roidb) + + def rpn_roidb(self, gt_roidb, append_gt=False): + """ + get rpn roidb and ground truth roidb + :param gt_roidb: ground truth roidb + :param append_gt: append ground truth + :return: roidb of rpn + """ + if append_gt: + logger.info('%s appending ground truth annotations' % self.name) + rpn_roidb = self.load_rpn_roidb(gt_roidb) + roidb = IMDB.merge_roidbs(gt_roidb, rpn_roidb) + else: + roidb = self.load_rpn_roidb(gt_roidb) + return roidb + + def create_roidb_from_box_list(self, box_list, gt_roidb): + """ + given ground truth, prepare roidb + :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2] + :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + """ + assert len(box_list) == self.num_images, 'number of boxes matrix must match number of images' + roidb = [] + for i in range(self.num_images): + roi_rec = dict() + roi_rec['image'] = gt_roidb[i]['image'] + roi_rec['height'] = gt_roidb[i]['height'] + roi_rec['width'] = gt_roidb[i]['width'] + + boxes = box_list[i] + if boxes.shape[1] == 5: + boxes = boxes[:, :4] + num_boxes = boxes.shape[0] + overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) + if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: + gt_boxes = gt_roidb[i]['boxes'] + gt_classes = gt_roidb[i]['gt_classes'] + # n boxes and k gt_boxes => n * k overlap + gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) + # for each box in n boxes, select only maximum overlap (must be greater than zero) + argmaxes = gt_overlaps.argmax(axis=1) + maxes = gt_overlaps.max(axis=1) + I = np.where(maxes > 0)[0] + overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] + + roi_rec.update({'boxes': boxes, + 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), + 'gt_overlaps': overlaps, + 'max_classes': overlaps.argmax(axis=1), + 'max_overlaps': overlaps.max(axis=1), + 'flipped': False}) + + # background roi => background class + zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0] + assert all(roi_rec['max_classes'][zero_indexes] == 0) + # foreground roi => foreground class + nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0] + assert all(roi_rec['max_classes'][nonzero_indexes] != 0) + + roidb.append(roi_rec) + + return roidb + + def append_flipped_images(self, roidb): + """ + append flipped images to an roidb + flip boxes coordinates, images will be actually flipped when loading into network + :param roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] + """ + logger.info('%s append flipped images to roidb' % self.name) + assert self.num_images == len(roidb) + for i in range(self.num_images): + roi_rec = roidb[i] + entry = {'image': roi_rec['image'], + 'stream': roi_rec['stream'], + 'height': roi_rec['height'], + 'width': roi_rec['width'], + #'boxes': boxes, + 'gt_classes': roidb[i]['gt_classes'], + 'gt_overlaps': roidb[i]['gt_overlaps'], + 'max_classes': roidb[i]['max_classes'], + 'max_overlaps': roidb[i]['max_overlaps'], + 'flipped': True} + for k in roi_rec: + if not k.startswith('boxes'): + continue + boxes = roi_rec[k].copy() + oldx1 = boxes[:, 0].copy() + oldx2 = boxes[:, 2].copy() + boxes[:, 0] = roi_rec['width'] - oldx2 - 1 + boxes[:, 2] = roi_rec['width'] - oldx1 - 1 + assert (boxes[:, 2] >= boxes[:, 0]).all() + entry[k] = boxes + if 'landmarks' in roi_rec: + k = 'landmarks' + landmarks = roi_rec[k].copy() + landmarks[:,:,0] *= -1 + landmarks[:,:,0] += (roi_rec['width']-1) + #for a in range(0,10,2): + # oldx1 = landmarks[:, a].copy() + # landmarks[:,a] = roi_rec['width'] - oldx1 - 1 + order = [1,0,2,4,3] + flandmarks = landmarks.copy() + for idx, a in enumerate(order): + flandmarks[:, idx,:] = landmarks[:,a,:] + + entry[k] = flandmarks + if 'blur' in roi_rec: + entry['blur'] = roi_rec['blur'] + roidb.append(entry) + + self.image_set_index *= 2 + return roidb + + def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None): + """ + evaluate detection proposal recall metrics + record max overlap value for each gt box; return vector of overlap values + :param roidb: used to evaluate + :param candidate_boxes: if not given, use roidb's non-gt boxes + :param thresholds: array-like recall threshold + :return: None + ar: average recall, recalls: vector recalls at each IoU overlap threshold + thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps + """ + area_names = ['all', '0-25', '25-50', '50-100', + '100-200', '200-300', '300-inf'] + area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2], [50**2, 100**2], + [100**2, 200**2], [200**2, 300**2], [300**2, 1e5**2]] + area_counts = [] + for area_name, area_range in zip(area_names[1:], area_ranges[1:]): + area_count = 0 + for i in range(self.num_images): + if candidate_boxes is None: + # default is use the non-gt boxes from roidb + non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] + boxes = roidb[i]['boxes'][non_gt_inds, :] + else: + boxes = candidate_boxes[i] + boxes_areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) + valid_range_inds = np.where((boxes_areas >= area_range[0]) & (boxes_areas < area_range[1]))[0] + area_count += len(valid_range_inds) + area_counts.append(area_count) + total_counts = float(sum(area_counts)) + for area_name, area_count in zip(area_names[1:], area_counts): + logger.info('percentage of %s is %f' % (area_name, area_count / total_counts)) + logger.info('average number of proposal is %f' % (total_counts / self.num_images)) + for area_name, area_range in zip(area_names, area_ranges): + gt_overlaps = np.zeros(0) + num_pos = 0 + for i in range(self.num_images): + # check for max_overlaps == 1 avoids including crowd annotations + max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) + gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] + gt_boxes = roidb[i]['boxes'][gt_inds, :] + gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) + valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas < area_range[1]))[0] + gt_boxes = gt_boxes[valid_gt_inds, :] + num_pos += len(valid_gt_inds) + + if candidate_boxes is None: + # default is use the non-gt boxes from roidb + non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] + boxes = roidb[i]['boxes'][non_gt_inds, :] + else: + boxes = candidate_boxes[i] + if boxes.shape[0] == 0: + continue + + overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) + + _gt_overlaps = np.zeros((gt_boxes.shape[0])) + # choose whatever is smaller to iterate + rounds = min(boxes.shape[0], gt_boxes.shape[0]) + for j in range(rounds): + # find which proposal maximally covers each gt box + argmax_overlaps = overlaps.argmax(axis=0) + # get the IoU amount of coverage for each gt box + max_overlaps = overlaps.max(axis=0) + # find which gt box is covered by most IoU + gt_ind = max_overlaps.argmax() + gt_ovr = max_overlaps.max() + assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) + # find the proposal box that covers the best covered gt box + box_ind = argmax_overlaps[gt_ind] + # record the IoU coverage of this gt box + _gt_overlaps[j] = overlaps[box_ind, gt_ind] + assert (_gt_overlaps[j] == gt_ovr) + # mark the proposal box and the gt box as used + overlaps[box_ind, :] = -1 + overlaps[:, gt_ind] = -1 + # append recorded IoU coverage level + gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) + + gt_overlaps = np.sort(gt_overlaps) + if thresholds is None: + step = 0.05 + thresholds = np.arange(0.5, 0.95 + 1e-5, step) + recalls = np.zeros_like(thresholds) + + # compute recall for each IoU threshold + for i, t in enumerate(thresholds): + recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) + ar = recalls.mean() + + # print results + print('average recall for {}: {:.3f}, number:{}'.format(area_name, ar, num_pos)) + for threshold, recall in zip(thresholds, recalls): + print('recall @{:.2f}: {:.3f}'.format(threshold, recall)) + + @staticmethod + def merge_roidbs(a, b): + """ + merge roidbs into one + :param a: roidb to be merged into + :param b: roidb to be merged + :return: merged imdb + """ + assert len(a) == len(b) + for i in range(len(a)): + a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) + a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], b[i]['gt_classes'])) + a[i]['gt_overlaps'] = np.vstack((a[i]['gt_overlaps'], b[i]['gt_overlaps'])) + a[i]['max_classes'] = np.hstack((a[i]['max_classes'], b[i]['max_classes'])) + a[i]['max_overlaps'] = np.hstack((a[i]['max_overlaps'], b[i]['max_overlaps'])) + return a diff --git a/retinaface/rcnn/dataset/retinaface.py b/retinaface/rcnn/dataset/retinaface.py new file mode 100644 index 0000000..d064f78 --- /dev/null +++ b/retinaface/rcnn/dataset/retinaface.py @@ -0,0 +1,181 @@ +from __future__ import print_function +try: + import cPickle as pickle +except ImportError: + import pickle +import cv2 +import os +import numpy as np +import json +from PIL import Image + +from ..logger import logger +from .imdb import IMDB +from .ds_utils import unique_boxes, filter_small_boxes +from ..config import config + +class retinaface(IMDB): + def __init__(self, image_set, root_path, data_path): + super(retinaface, self).__init__('retinaface', image_set, root_path, data_path) + #assert image_set=='train' + + split = image_set + self._split = image_set + self._image_set = image_set + + + self.root_path = root_path + self.data_path = data_path + + + self._dataset_path = self.data_path + self._imgs_path = os.path.join(self._dataset_path, image_set, 'images') + self._fp_bbox_map = {} + label_file = os.path.join(self._dataset_path, image_set, 'label.txt') + name = None + for line in open(label_file, 'r'): + line = line.strip() + if line.startswith('#'): + name = line[1:].strip() + self._fp_bbox_map[name] = [] + continue + assert name is not None + assert name in self._fp_bbox_map + self._fp_bbox_map[name].append(line) + print('origin image size', len(self._fp_bbox_map)) + + #self.num_images = len(self._image_paths) + #self._image_index = range(len(self._image_paths)) + self.classes = ['bg', 'face'] + self.num_classes = len(self.classes) + + + def gt_roidb(self): + cache_file = os.path.join(self.cache_path, '{}_{}_gt_roidb.pkl'.format(self.name, self._split)) + if os.path.exists(cache_file): + with open(cache_file, 'rb') as fid: + roidb = pickle.load(fid) + print('{} gt roidb loaded from {}'.format(self.name, cache_file)) + self.num_images = len(roidb) + return roidb + + roidb = [] + max_num_boxes = 0 + nonattr_box_num = 0 + landmark_num = 0 + + for fp in self._fp_bbox_map: + if self._split=='test': + image_path = os.path.join(self._imgs_path, fp) + roi = {'image': image_path} + roidb.append(roi) + continue + boxes = np.zeros([len(self._fp_bbox_map[fp]), 4], np.float) + landmarks = np.zeros([len(self._fp_bbox_map[fp]), 5, 3], np.float) + blur = np.zeros((len(self._fp_bbox_map[fp]),), np.float) + boxes_mask = [] + + gt_classes = np.ones([len(self._fp_bbox_map[fp])], np.int32) + overlaps = np.zeros([len(self._fp_bbox_map[fp]), 2], np.float) + + ix = 0 + + for aline in self._fp_bbox_map[fp]: + imsize = Image.open(os.path.join(self._imgs_path, fp)).size + values = [float(x) for x in aline.strip().split()] + bbox = [values[0], values[1], values[0]+values[2], values[1]+values[3]] + + x1 = bbox[0] + y1 = bbox[1] + x2 = min(imsize[0], bbox[2]) + y2 = min(imsize[1], bbox[3]) + if x1>=x2 or y1>=y2: + continue + + if config.BBOX_MASK_THRESH>0: + if (x2 - x1) < config.BBOX_MASK_THRESH or y2 - y1 < config.BBOX_MASK_THRESH: + boxes_mask.append(np.array([x1, y1, x2, y2], np.float)) + continue + if (x2 - x1) < config.TRAIN.MIN_BOX_SIZE or y2 - y1 < config.TRAIN.MIN_BOX_SIZE: + continue + + boxes[ix, :] = np.array([x1, y1, x2, y2], np.float) + if self._split=='train': + landmark = np.array( values[4:19], dtype=np.float32 ).reshape((5,3)) + for li in range(5): + #print(landmark) + if landmark[li][0]==-1. and landmark[li][1]==-1.: #missing landmark + assert landmark[li][2]==-1 + else: + assert landmark[li][2]>=0 + if li==0: + landmark_num+=1 + if landmark[li][2]==0.0:#visible + landmark[li][2] = 1.0 + else: + landmark[li][2] = 0.0 + + landmarks[ix] = landmark + + blur[ix] = values[19] + #print(aline, blur[ix]) + if blur[ix]<0.0: + blur[ix] = 0.3 + nonattr_box_num+=1 + + cls = int(1) + gt_classes[ix] = cls + overlaps[ix, cls] = 1.0 + ix += 1 + max_num_boxes = max(max_num_boxes, ix) + #overlaps = scipy.sparse.csr_matrix(overlaps) + if self._split=='train' and ix==0: + continue + boxes = boxes[:ix,:] + landmarks = landmarks[:ix,:,:] + blur = blur[:ix] + gt_classes = gt_classes[:ix] + overlaps = overlaps[:ix,:] + image_path = os.path.join(self._imgs_path, fp) + with open(image_path, 'rb') as fin: + stream = fin.read() + stream = np.fromstring(stream, dtype=np.uint8) + + roi = { + 'image': image_path, + 'stream': stream, + 'height': imsize[1], + 'width': imsize[0], + 'boxes': boxes, + 'landmarks': landmarks, + 'blur': blur, + 'gt_classes': gt_classes, + 'gt_overlaps': overlaps, + 'max_classes': overlaps.argmax(axis=1), + 'max_overlaps': overlaps.max(axis=1), + 'flipped': False, + } + if len(boxes_mask)>0: + boxes_mask = np.array(boxes_mask) + roi['boxes_mask'] = boxes_mask + roidb.append(roi) + for roi in roidb: + roi['max_num_boxes'] = max_num_boxes + self.num_images = len(roidb) + print('roidb size', len(roidb)) + print('non attr box num', nonattr_box_num) + print('landmark num', landmark_num) + with open(cache_file, 'wb') as fid: + pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) + print('wrote gt roidb to {}'.format(cache_file)) + + return roidb + + def write_detections(self, all_boxes, output_dir='./output/'): + pass + + + def evaluate_detections(self, all_boxes, output_dir='./output/',method_name='insightdetection'): + pass + + diff --git a/retinaface/rcnn/logger.py b/retinaface/rcnn/logger.py new file mode 100644 index 0000000..2806e1a --- /dev/null +++ b/retinaface/rcnn/logger.py @@ -0,0 +1,6 @@ +import logging + +# set up logger +logging.basicConfig() +logger = logging.getLogger() +logger.setLevel(logging.INFO) diff --git a/retinaface/rcnn/processing/__init__.py b/retinaface/rcnn/processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/retinaface/rcnn/processing/assign_levels.py b/retinaface/rcnn/processing/assign_levels.py new file mode 100755 index 0000000..b237439 --- /dev/null +++ b/retinaface/rcnn/processing/assign_levels.py @@ -0,0 +1,37 @@ +from rcnn.config import config +import numpy as np + + +def compute_assign_targets(rois, threshold): + rois_area = np.sqrt((rois[:, 2] - rois[:, 0] + 1) * (rois[:, 3] - rois[:, 1] + 1)) + num_rois = np.shape(rois)[0] + assign_levels = np.zeros(num_rois, dtype=np.uint8) + for i, stride in enumerate(config.RCNN_FEAT_STRIDE): + thd = threshold[i] + idx = np.logical_and(thd[1] <= rois_area, rois_area < thd[0]) + assign_levels[idx] = stride + + assert 0 not in assign_levels, "All rois should assign to specify levels." + return assign_levels + + +def add_assign_targets(roidb): + """ + given roidb, add ['assign_level'] + :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb + """ + print 'add assign targets' + assert len(roidb) > 0 + assert 'boxes' in roidb[0] + + area_threshold = [[np.inf, 448], + [448, 224], + [224, 112], + [112, 0]] + + assert len(config.RCNN_FEAT_STRIDE) == len(area_threshold) + + num_images = len(roidb) + for im_i in range(num_images): + rois = roidb[im_i]['boxes'] + roidb[im_i]['assign_levels'] = compute_assign_targets(rois, area_threshold) diff --git a/retinaface/rcnn/processing/bbox_regression.py b/retinaface/rcnn/processing/bbox_regression.py new file mode 100644 index 0000000..85ef246 --- /dev/null +++ b/retinaface/rcnn/processing/bbox_regression.py @@ -0,0 +1,255 @@ +""" +This file has functions about generating bounding box regression targets +""" + +from ..pycocotools.mask import encode +import numpy as np + +from ..logger import logger +from .bbox_transform import bbox_overlaps, bbox_transform +from rcnn.config import config +import math +import cv2 +import PIL.Image as Image +import threading +import Queue + + +def compute_bbox_regression_targets(rois, overlaps, labels): + """ + given rois, overlaps, gt labels, compute bounding box regression targets + :param rois: roidb[i]['boxes'] k * 4 + :param overlaps: roidb[i]['max_overlaps'] k * 1 + :param labels: roidb[i]['max_classes'] k * 1 + :return: targets[i][class, dx, dy, dw, dh] k * 5 + """ + # Ensure ROIs are floats + rois = rois.astype(np.float, copy=False) + + # Sanity check + if len(rois) != len(overlaps): + logger.warning('bbox regression: len(rois) != len(overlaps)') + + # Indices of ground-truth ROIs + gt_inds = np.where(overlaps == 1)[0] + if len(gt_inds) == 0: + logger.warning('bbox regression: len(gt_inds) == 0') + + # Indices of examples for which we try to make predictions + ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] + + # Get IoU overlap between each ex ROI and gt ROI + ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) + + # Find which gt ROI each ex ROI has max overlap with: + # this will be the ex ROI's gt target + gt_assignment = ex_gt_overlaps.argmax(axis=1) + gt_rois = rois[gt_inds[gt_assignment], :] + ex_rois = rois[ex_inds, :] + + targets = np.zeros((rois.shape[0], 5), dtype=np.float32) + targets[ex_inds, 0] = labels[ex_inds] + targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) + return targets + + +def add_bbox_regression_targets(roidb): + """ + given roidb, add ['bbox_targets'] and normalize bounding box regression targets + :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb + :return: means, std variances of targets + """ + logger.info('bbox regression: add bounding box regression targets') + assert len(roidb) > 0 + assert 'max_classes' in roidb[0] + + num_images = len(roidb) + num_classes = roidb[0]['gt_overlaps'].shape[1] + for im_i in range(num_images): + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes) + + if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: + # use fixed / precomputed means and stds instead of empirical values + means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1)) + stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1)) + else: + # compute mean, std values + class_counts = np.zeros((num_classes, 1)) + 1e-14 + sums = np.zeros((num_classes, 4)) + squared_sums = np.zeros((num_classes, 4)) + for im_i in range(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in range(1, num_classes): + cls_indexes = np.where(targets[:, 0] == cls)[0] + if cls_indexes.size > 0: + class_counts[cls] += cls_indexes.size + sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) + squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) + + means = sums / class_counts + # var(x) = E(x^2) - E(x)^2 + stds = np.sqrt(squared_sums / class_counts - means ** 2) + + # normalized targets + for im_i in range(num_images): + targets = roidb[im_i]['bbox_targets'] + for cls in range(1, num_classes): + cls_indexes = np.where(targets[:, 0] == cls)[0] + roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] + roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] + + return means.ravel(), stds.ravel() + + +def expand_bbox_regression_targets(bbox_targets_data, num_classes): + """ + expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets + :param bbox_targets_data: [k * 5] + :param num_classes: number of classes + :return: bbox target processed [k * 4 num_classes] + bbox_weights ! only foreground boxes have bbox regression computation! + """ + classes = bbox_targets_data[:, 0] + bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) + bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) + indexes = np.where(classes > 0)[0] + for index in indexes: + cls = classes[index] + start = int(4 * cls) + end = start + 4 + bbox_targets[index, start:end] = bbox_targets_data[index, 1:] + bbox_weights[index, start:end] = config.TRAIN.BBOX_WEIGHTS + return bbox_targets, bbox_weights + + +def compute_mask_and_label(ex_rois, ex_labels, seg, flipped): + # assert os.path.exists(seg_gt), 'Path does not exist: {}'.format(seg_gt) + # im = Image.open(seg_gt) + # pixel = list(im.getdata()) + # pixel = np.array(pixel).reshape([im.size[1], im.size[0]]) + im = Image.open(seg) + pixel = list(im.getdata()) + ins_seg = np.array(pixel).reshape([im.size[1], im.size[0]]) + if flipped: + ins_seg = ins_seg[:, ::-1] + rois = ex_rois + n_rois = ex_rois.shape[0] + label = ex_labels + class_id = config.CLASS_ID + mask_target = np.zeros((n_rois, 28, 28), dtype=np.int8) + mask_label = np.zeros((n_rois), dtype=np.int8) + for n in range(n_rois): + target = ins_seg[int(rois[n, 1]): int(rois[n, 3]), int(rois[n, 0]): int(rois[n, 2])] + ids = np.unique(target) + ins_id = 0 + max_count = 0 + for id in ids: + if math.floor(id / 1000) == class_id[int(label[int(n)])]: + px = np.where(ins_seg == int(id)) + x_min = np.min(px[1]) + y_min = np.min(px[0]) + x_max = np.max(px[1]) + y_max = np.max(px[0]) + x1 = max(rois[n, 0], x_min) + y1 = max(rois[n, 1], y_min) + x2 = min(rois[n, 2], x_max) + y2 = min(rois[n, 3], y_max) + iou = (x2 - x1) * (y2 - y1) + iou = iou / ((rois[n, 2] - rois[n, 0]) * (rois[n, 3] - rois[n, 1]) + + (x_max - x_min) * (y_max - y_min) - iou) + if iou > max_count: + ins_id = id + max_count = iou + + if max_count == 0: + continue + # print max_count + mask = np.zeros(target.shape) + idx = np.where(target == ins_id) + mask[idx] = 1 + mask = cv2.resize(mask, (28, 28), interpolation=cv2.INTER_NEAREST) + + mask_target[n] = mask + mask_label[n] = label[int(n)] + return mask_target, mask_label + + +def compute_bbox_mask_targets_and_label(rois, overlaps, labels, seg, flipped): + """ + given rois, overlaps, gt labels, seg, compute bounding box mask targets + :param rois: roidb[i]['boxes'] k * 4 + :param overlaps: roidb[i]['max_overlaps'] k * 1 + :param labels: roidb[i]['max_classes'] k * 1 + :return: targets[i][class, dx, dy, dw, dh] k * 5 + """ + # Ensure ROIs are floats + rois = rois.astype(np.float, copy=False) + + # Sanity check + if len(rois) != len(overlaps): + print 'bbox regression: this should not happen' + + # Indices of ground-truth ROIs + gt_inds = np.where(overlaps == 1)[0] + if len(gt_inds) == 0: + print 'something wrong : zero ground truth rois' + # Indices of examples for which we try to make predictions + ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] + + # Get IoU overlap between each ex ROI and gt ROI + ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) + + + # Find which gt ROI each ex ROI has max overlap with: + # this will be the ex ROI's gt target + gt_assignment = ex_gt_overlaps.argmax(axis=1) + gt_rois = rois[gt_inds[gt_assignment], :] + ex_rois = rois[ex_inds, :] + + mask_targets, mask_label = compute_mask_and_label(ex_rois, labels[ex_inds], seg, flipped) + return mask_targets, mask_label, ex_inds + +def add_mask_targets(roidb): + """ + given roidb, add ['bbox_targets'] and normalize bounding box regression targets + :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb + :return: means, std variances of targets + """ + print 'add bounding box mask targets' + assert len(roidb) > 0 + assert 'max_classes' in roidb[0] + + num_images = len(roidb) + + # Multi threads processing + im_quene = Queue.Queue(maxsize=0) + for im_i in range(num_images): + im_quene.put(im_i) + + def process(): + while not im_quene.empty(): + im_i = im_quene.get() + print "-----process img {}".format(im_i) + rois = roidb[im_i]['boxes'] + max_overlaps = roidb[im_i]['max_overlaps'] + max_classes = roidb[im_i]['max_classes'] + ins_seg = roidb[im_i]['ins_seg'] + flipped = roidb[im_i]['flipped'] + roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ + compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg, flipped) + threads = [threading.Thread(target=process, args=()) for i in xrange(10)] + for t in threads: t.start() + for t in threads: t.join() + # Single thread + # for im_i in range(num_images): + # print "-----processing img {}".format(im_i) + # rois = roidb[im_i]['boxes'] + # max_overlaps = roidb[im_i]['max_overlaps'] + # max_classes = roidb[im_i]['max_classes'] + # ins_seg = roidb[im_i]['ins_seg'] + # # roidb[im_i]['mask_targets'] = compute_bbox_mask_targets(rois, max_overlaps, max_classes, ins_seg) + # roidb[im_i]['mask_targets'], roidb[im_i]['mask_labels'], roidb[im_i]['mask_inds'] = \ + # compute_bbox_mask_targets_and_label(rois, max_overlaps, max_classes, ins_seg) diff --git a/retinaface/rcnn/processing/bbox_transform.py b/retinaface/rcnn/processing/bbox_transform.py new file mode 100644 index 0000000..eb634a9 --- /dev/null +++ b/retinaface/rcnn/processing/bbox_transform.py @@ -0,0 +1,216 @@ +import numpy as np +from ..cython.bbox import bbox_overlaps_cython +from rcnn.config import config + + +def bbox_overlaps(boxes, query_boxes): + return bbox_overlaps_cython(boxes, query_boxes) + + +def bbox_overlaps_py(boxes, query_boxes): + """ + determine overlaps between boxes and query_boxes + :param boxes: n * 4 bounding boxes + :param query_boxes: k * 4 bounding boxes + :return: overlaps: n * k overlaps + """ + n_ = boxes.shape[0] + k_ = query_boxes.shape[0] + overlaps = np.zeros((n_, k_), dtype=np.float) + for k in range(k_): + query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + for n in range(n_): + iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 + if iw > 0: + ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 + if ih > 0: + box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + all_area = float(box_area + query_box_area - iw * ih) + overlaps[n, k] = iw * ih / all_area + return overlaps + + +def clip_boxes(boxes, im_shape): + """ + Clip boxes to image boundaries. + :param boxes: [N, 4* num_classes] + :param im_shape: tuple of 2 + :return: [N, 4* num_classes] + """ + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +def nonlinear_transform(ex_rois, gt_rois): + """ + compute bounding box regression targets from ex_rois to gt_rois + :param ex_rois: [N, 4] + :param gt_rois: [N, 4] + :return: [N, 4] + """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 + ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) + ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) + + gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 + gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 + gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) + gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) + + targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) + targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) + targets_dw = np.log(gt_widths / ex_widths) + targets_dh = np.log(gt_heights / ex_heights) + + if gt_rois.shape[1]<=4: + targets = np.vstack( + (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() + return targets + else: + targets = [targets_dx, targets_dy, targets_dw, targets_dh] + if config.USE_BLUR: + for i in range(4, gt_rois.shape[1]): + t = gt_rois[:,i] + targets.append(t) + targets = np.vstack(targets).transpose() + return targets + +def landmark_transform(ex_rois, gt_rois): + + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + + ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 + ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 + ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) + ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) + + + targets = [] + for i in range(gt_rois.shape[1]): + for j in range(gt_rois.shape[2]): + if not config.USE_OCCLUSION and j==2: + continue + if j==0: #w + target = (gt_rois[:,i,j] - ex_ctr_x) / (ex_widths + 1e-14) + elif j==1: #h + target = (gt_rois[:,i,j] - ex_ctr_y) / (ex_heights + 1e-14) + else: #visibile + target = gt_rois[:,i,j] + targets.append(target) + + + targets = np.vstack(targets).transpose() + return targets + + +def nonlinear_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + + dx = box_deltas[:, 0::4] + dy = box_deltas[:, 1::4] + dw = box_deltas[:, 2::4] + dh = box_deltas[:, 3::4] + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) + # y1 + pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) + # x2 + pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) + # y2 + pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) + + return pred_boxes + +def landmark_pred(boxes, landmark_deltas): + if boxes.shape[0] == 0: + return np.zeros((0, landmark_deltas.shape[1])) + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + preds = [] + for i in range(landmark_deltas.shape[1]): + if i%2==0: + pred = (landmark_deltas[:,i]*widths + ctr_x) + else: + pred = (landmark_deltas[:,i]*heights + ctr_y) + preds.append(pred) + preds = np.vstack(preds).transpose() + return preds + +def iou_transform(ex_rois, gt_rois): + """ return bbox targets, IoU loss uses gt_rois as gt """ + assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' + return gt_rois + + +def iou_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + dx1 = box_deltas[:, 0::4] + dy1 = box_deltas[:, 1::4] + dx2 = box_deltas[:, 2::4] + dy2 = box_deltas[:, 3::4] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] + # y1 + pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] + # x2 + pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] + # y2 + pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] + + return pred_boxes + + +# define bbox_transform and bbox_pred +bbox_transform = nonlinear_transform +bbox_pred = nonlinear_pred diff --git a/retinaface/rcnn/processing/generate_anchor.py b/retinaface/rcnn/processing/generate_anchor.py new file mode 100644 index 0000000..e9330ea --- /dev/null +++ b/retinaface/rcnn/processing/generate_anchor.py @@ -0,0 +1,127 @@ +""" +Generate base anchors on index 0 +""" +from __future__ import print_function +import sys +from builtins import range +import numpy as np +from ..cython.anchors import anchors_cython +from ..config import config + + +def anchors_plane(feat_h, feat_w, stride, base_anchor): + return anchors_cython(feat_h, feat_w, stride, base_anchor) + +def generate_anchors(base_size=16, ratios=[0.5, 1, 2], + scales=2 ** np.arange(3, 6), stride=16, dense_anchor=False): + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + + base_anchor = np.array([1, 1, base_size, base_size]) - 1 + ratio_anchors = _ratio_enum(base_anchor, ratios) + anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) + for i in range(ratio_anchors.shape[0])]) + if dense_anchor: + assert stride%2==0 + anchors2 = anchors.copy() + anchors2[:,:] += int(stride/2) + anchors = np.vstack( (anchors, anchors2) ) + #print('GA',base_anchor.shape, ratio_anchors.shape, anchors.shape) + return anchors + +#def generate_anchors_fpn(base_size=[64,32,16,8,4], ratios=[0.5, 1, 2], scales=8): +# """ +# Generate anchor (reference) windows by enumerating aspect ratios X +# scales wrt a reference (0, 0, 15, 15) window. +# """ +# anchors = [] +# _ratios = ratios.reshape( (len(base_size), -1) ) +# _scales = scales.reshape( (len(base_size), -1) ) +# for i,bs in enumerate(base_size): +# __ratios = _ratios[i] +# __scales = _scales[i] +# #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) +# r = generate_anchors(bs, __ratios, __scales) +# #print('anchors_fpn', r.shape, file=sys.stderr) +# anchors.append(r) +# return anchors + +def generate_anchors_fpn(dense_anchor=False, cfg = None): + #assert(False) + """ + Generate anchor (reference) windows by enumerating aspect ratios X + scales wrt a reference (0, 0, 15, 15) window. + """ + if cfg is None: + cfg = config.RPN_ANCHOR_CFG + RPN_FEAT_STRIDE = [] + for k in cfg: + RPN_FEAT_STRIDE.append( int(k) ) + RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True) + anchors = [] + for k in RPN_FEAT_STRIDE: + v = cfg[str(k)] + bs = v['BASE_SIZE'] + __ratios = np.array(v['RATIOS']) + __scales = np.array(v['SCALES']) + stride = int(k) + #print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr) + r = generate_anchors(bs, __ratios, __scales, stride, dense_anchor) + #print('anchors_fpn', r.shape, file=sys.stderr) + anchors.append(r) + + return anchors + +def _whctrs(anchor): + """ + Return width, height, x center, and y center for an anchor (window). + """ + + w = anchor[2] - anchor[0] + 1 + h = anchor[3] - anchor[1] + 1 + x_ctr = anchor[0] + 0.5 * (w - 1) + y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + + +def _mkanchors(ws, hs, x_ctr, y_ctr): + """ + Given a vector of widths (ws) and heights (hs) around a center + (x_ctr, y_ctr), output a set of anchors (windows). + """ + + ws = ws[:, np.newaxis] + hs = hs[:, np.newaxis] + anchors = np.hstack((x_ctr - 0.5 * (ws - 1), + y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), + y_ctr + 0.5 * (hs - 1))) + return anchors + + +def _ratio_enum(anchor, ratios): + """ + Enumerate a set of anchors for each aspect ratio wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + size = w * h + size_ratios = size / ratios + ws = np.round(np.sqrt(size_ratios)) + hs = np.round(ws * ratios) + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + + +def _scale_enum(anchor, scales): + """ + Enumerate a set of anchors for each scale wrt an anchor. + """ + + w, h, x_ctr, y_ctr = _whctrs(anchor) + ws = w * scales + hs = h * scales + anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors diff --git a/retinaface/rcnn/processing/nms.py b/retinaface/rcnn/processing/nms.py new file mode 100644 index 0000000..230139c --- /dev/null +++ b/retinaface/rcnn/processing/nms.py @@ -0,0 +1,64 @@ +import numpy as np +from ..cython.cpu_nms import cpu_nms +try: + from ..cython.gpu_nms import gpu_nms +except ImportError: + gpu_nms = None + + +def py_nms_wrapper(thresh): + def _nms(dets): + return nms(dets, thresh) + return _nms + + +def cpu_nms_wrapper(thresh): + def _nms(dets): + return cpu_nms(dets, thresh) + return _nms + + +def gpu_nms_wrapper(thresh, device_id): + def _nms(dets): + return gpu_nms(dets, thresh, device_id) + if gpu_nms is not None: + return _nms + else: + return cpu_nms_wrapper(thresh) + + +def nms(dets, thresh): + """ + greedily select boxes with high confidence and overlap with current maximum <= thresh + rule out overlap >= thresh + :param dets: [[x1, y1, x2, y2 score]] + :param thresh: retain overlap < thresh + :return: indexes to keep + """ + x1 = dets[:, 0] + y1 = dets[:, 1] + x2 = dets[:, 2] + y2 = dets[:, 3] + scores = dets[:, 4] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + + return keep diff --git a/retinaface/retinaface.py b/retinaface/retinaface.py new file mode 100644 index 0000000..fb17214 --- /dev/null +++ b/retinaface/retinaface.py @@ -0,0 +1,610 @@ +from __future__ import print_function +import sys +import os +import datetime +import time +import numpy as np +import mxnet as mx +from mxnet import ndarray as nd +import cv2 +#from rcnn import config +from rcnn.logger import logger +#from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, landmark_pred +from rcnn.processing.bbox_transform import clip_boxes +from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane +from rcnn.processing.nms import gpu_nms_wrapper, cpu_nms_wrapper +from rcnn.processing.bbox_transform import bbox_overlaps + +class RetinaFace: + def __init__(self, prefix, epoch, ctx_id=0, network='net3', nms=0.4, nocrop=False, decay4 = 0.5, vote=False): + self.ctx_id = ctx_id + self.network = network + self.decay4 = decay4 + self.nms_threshold = nms + self.vote = vote + self.nocrop = nocrop + self.debug = False + self.fpn_keys = [] + self.anchor_cfg = None + pixel_means=[0.0, 0.0, 0.0] + pixel_stds=[1.0, 1.0, 1.0] + pixel_scale = 1.0 + self.preprocess = False + _ratio = (1.,) + fmc = 3 + if network=='ssh' or network=='vgg': + pixel_means=[103.939, 116.779, 123.68] + self.preprocess = True + elif network=='net3': + _ratio = (1.,) + elif network=='net3a': + _ratio = (1.,1.5) + elif network=='net6': #like pyramidbox or s3fd + fmc = 6 + elif network=='net5': #retinaface + fmc = 5 + elif network=='net5a': + fmc = 5 + _ratio = (1.,1.5) + elif network=='net4': + fmc = 4 + elif network=='net4a': + fmc = 4 + _ratio = (1.,1.5) + else: + assert False, 'network setting error %s'%network + + if fmc==3: + self._feat_stride_fpn = [32, 16, 8] + self.anchor_cfg = { + '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + } + elif fmc==4: + self._feat_stride_fpn = [32, 16, 8, 4] + self.anchor_cfg = { + '32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '4': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + } + elif fmc==6: + self._feat_stride_fpn = [128, 64, 32, 16, 8, 4] + self.anchor_cfg = { + '128': {'SCALES': (32,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '64': {'SCALES': (16,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '32': {'SCALES': (8,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '16': {'SCALES': (4,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '8': {'SCALES': (2,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + '4': {'SCALES': (1,), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}, + } + elif fmc==5: + self._feat_stride_fpn = [64, 32, 16, 8, 4] + self.anchor_cfg = {} + _ass = 2.0**(1.0/3) + _basescale = 1.0 + for _stride in [4, 8, 16, 32, 64]: + key = str(_stride) + value = {'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999} + scales = [] + for _ in range(3): + scales.append(_basescale) + _basescale *= _ass + value['SCALES'] = tuple(scales) + self.anchor_cfg[key] = value + + print(self._feat_stride_fpn, self.anchor_cfg) + + for s in self._feat_stride_fpn: + self.fpn_keys.append('stride%s'%s) + + + dense_anchor = False + #self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios))) + self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(dense_anchor=dense_anchor, cfg=self.anchor_cfg))) + for k in self._anchors_fpn: + v = self._anchors_fpn[k].astype(np.float32) + self._anchors_fpn[k] = v + + self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()])) + #self._bbox_pred = nonlinear_pred + #self._landmark_pred = landmark_pred + sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch) + if self.ctx_id>=0: + self.ctx = mx.gpu(self.ctx_id) + self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id) + else: + self.ctx = mx.cpu() + self.nms = cpu_nms_wrapper(self.nms_threshold) + self.pixel_means = np.array(pixel_means, dtype=np.float32) + self.pixel_stds = np.array(pixel_stds, dtype=np.float32) + self.pixel_scale = float(pixel_scale) + print('means', self.pixel_means) + self.use_landmarks = False + if len(sym)//len(self._feat_stride_fpn)==3: + self.use_landmarks = True + print('use_landmarks', self.use_landmarks) + + if self.debug: + c = len(sym)//len(self._feat_stride_fpn) + sym = sym[(c*0):] + self._feat_stride_fpn = [32,16,8] + print('sym size:', len(sym)) + + image_size = (640, 640) + self.model = mx.mod.Module(symbol=sym, context=self.ctx, label_names = None) + self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) + self.model.set_params(arg_params, aux_params) + + def get_input(self, img): + im = img.astype(np.float32) + im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) + for i in range(3): + im_tensor[0, i, :, :] = (im[:, :, 2 - i]/self.pixel_scale - self.pixel_means[2 - i])/self.pixel_stds[2-i] + #if self.debug: + # timeb = datetime.datetime.now() + # diff = timeb - timea + # print('X2 uses', diff.total_seconds(), 'seconds') + data = nd.array(im_tensor) + return data + + def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False): + #print('in_detect', threshold, scales, do_flip, do_nms) + proposals_list = [] + scores_list = [] + landmarks_list = [] + timea = datetime.datetime.now() + flips = [0] + if do_flip: + flips = [0, 1] + + for im_scale in scales: + for flip in flips: + if im_scale!=1.0: + im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) + else: + im = img.copy() + if flip: + im = im[:,::-1,:] + if self.nocrop: + if im.shape[0]%32==0: + h = im.shape[0] + else: + h = (im.shape[0]//32+1)*32 + if im.shape[1]%32==0: + w = im.shape[1] + else: + w = (im.shape[1]//32+1)*32 + _im = np.zeros( (h, w, 3), dtype=np.float32 ) + _im[0:im.shape[0], 0:im.shape[1], :] = im + im = _im + else: + im = im.astype(np.float32) + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X1 uses', diff.total_seconds(), 'seconds') + #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False) + #im_info = [im.shape[0], im.shape[1], im_scale] + im_info = [im.shape[0], im.shape[1]] + im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1])) + for i in range(3): + im_tensor[0, i, :, :] = (im[:, :, 2 - i]/self.pixel_scale - self.pixel_means[2 - i])/self.pixel_stds[2-i] + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X2 uses', diff.total_seconds(), 'seconds') + data = nd.array(im_tensor) + db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)]) + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('X3 uses', diff.total_seconds(), 'seconds') + self.model.forward(db, is_train=False) + net_out = self.model.get_outputs() + #post_nms_topN = self._rpn_post_nms_top_n + #min_size_dict = self._rpn_min_size_fpn + + for _idx,s in enumerate(self._feat_stride_fpn): + #if len(scales)>1 and s==32 and im_scale==scales[-1]: + # continue + _key = 'stride%s'%s + stride = int(s) + #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]): + # continue + if self.use_landmarks: + idx = _idx*3 + else: + idx = _idx*2 + #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr) + scores = net_out[idx].asnumpy() + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('A uses', diff.total_seconds(), 'seconds') + #print(scores.shape) + #print('scores',stride, scores.shape, file=sys.stderr) + scores = scores[:, self._num_anchors['stride%s'%s]:, :, :] + + idx+=1 + bbox_deltas = net_out[idx].asnumpy() + + #if DEBUG: + # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) + # print 'scale: {}'.format(im_info[2]) + + #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride) + height, width = bbox_deltas.shape[2], bbox_deltas.shape[3] + + A = self._num_anchors['stride%s'%s] + K = height * width + anchors_fpn = self._anchors_fpn['stride%s'%s] + anchors = anchors_plane(height, width, stride, anchors_fpn) + #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr) + anchors = anchors.reshape((K * A, 4)) + #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr) + #print('HW', (height, width), file=sys.stderr) + #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr) + #print('anchors', anchors.shape, file=sys.stderr) + #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr) + #print('scores', scores.shape, file=sys.stderr) + + + scores = self._clip_pad(scores, (height, width)) + scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) + + #print('pre', bbox_deltas.shape, height, width) + bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) + #print('after', bbox_deltas.shape, height, width) + bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)) + bbox_pred_len = bbox_deltas.shape[3]//A + #print(bbox_deltas.shape) + bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len)) + + + #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr) + proposals = self.bbox_pred(anchors, bbox_deltas) + proposals = clip_boxes(proposals, im_info[:2]) + + #if self.vote: + # if im_scale>1.0: + # keep = self._filter_boxes2(proposals, 160*im_scale, -1) + # else: + # keep = self._filter_boxes2(proposals, -1, 100*im_scale) + # if stride==4: + # keep = self._filter_boxes2(proposals, 12*im_scale, -1) + # proposals = proposals[keep, :] + # scores = scores[keep] + + #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2]) + #proposals = proposals[keep, :] + #scores = scores[keep] + #print('333', proposals.shape) + + scores_ravel = scores.ravel() + #print('__shapes', proposals.shape, scores_ravel.shape) + #print('max score', np.max(scores_ravel)) + order = np.where(scores_ravel>=threshold)[0] + #_scores = scores_ravel[order] + #_order = _scores.argsort()[::-1] + #order = order[_order] + proposals = proposals[order, :] + scores = scores[order] + if stride==4 and self.decay4<1.0: + scores *= self.decay4 + if flip: + oldx1 = proposals[:, 0].copy() + oldx2 = proposals[:, 2].copy() + proposals[:, 0] = im.shape[1] - oldx2 - 1 + proposals[:, 2] = im.shape[1] - oldx1 - 1 + + proposals[:,0:4] /= im_scale + + proposals_list.append(proposals) + scores_list.append(scores) + + if not self.vote and self.use_landmarks: + idx+=1 + landmark_deltas = net_out[idx].asnumpy() + landmark_deltas = self._clip_pad(landmark_deltas, (height, width)) + landmark_pred_len = landmark_deltas.shape[1]//A + landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len//5)) + #print(landmark_deltas.shape, landmark_deltas) + landmarks = self.landmark_pred(anchors, landmark_deltas) + landmarks = landmarks[order, :] + + if flip: + landmarks[:,:,0] = im.shape[1] - landmarks[:,:,0] - 1 + #for a in range(5): + # oldx1 = landmarks[:, a].copy() + # landmarks[:,a] = im.shape[1] - oldx1 - 1 + order = [1,0,2,4,3] + flandmarks = landmarks.copy() + for idx, a in enumerate(order): + flandmarks[:,idx,:] = landmarks[:,a,:] + #flandmarks[:, idx*2] = landmarks[:,a*2] + #flandmarks[:, idx*2+1] = landmarks[:,a*2+1] + landmarks = flandmarks + landmarks[:,:,0:2] /= im_scale + #landmarks /= im_scale + #landmarks = landmarks.reshape( (-1, landmark_pred_len) ) + landmarks_list.append(landmarks) + #proposals = np.hstack((proposals, landmarks)) + + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('B uses', diff.total_seconds(), 'seconds') + proposals = np.vstack(proposals_list) + landmarks = None + if proposals.shape[0]==0: + if self.use_landmarks: + landmarks = np.zeros( (0,5,2) ) + return np.zeros( (0,5) ), landmarks + scores = np.vstack(scores_list) + #print('shapes', proposals.shape, scores.shape) + scores_ravel = scores.ravel() + order = scores_ravel.argsort()[::-1] + #if config.TEST.SCORE_THRESH>0.0: + # _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH) + # order = order[:_count] + proposals = proposals[order, :] + scores = scores[order] + if not self.vote and self.use_landmarks: + landmarks = np.vstack(landmarks_list) + landmarks = landmarks[order].astype(np.float32, copy=False) + + pre_det = np.hstack((proposals[:,0:4], scores)).astype(np.float32, copy=False) + if not self.vote: + keep = self.nms(pre_det) + det = np.hstack( (pre_det, proposals[:,4:]) ) + det = det[keep, :] + if self.use_landmarks: + landmarks = landmarks[keep] + else: + det = np.hstack( (pre_det, proposals[:,4:]) ) + det = self.bbox_vote(det) + #if self.use_landmarks: + # det = np.hstack((det, landmarks)) + + if self.debug: + timeb = datetime.datetime.now() + diff = timeb - timea + print('C uses', diff.total_seconds(), 'seconds') + return det, landmarks + + def detect_center(self, img, threshold=0.5, scales=[1.0], do_flip=False): + det, landmarks = self.detect(img, threshold, scales, do_flip) + if det.shape[0]==0: + return None, None + bindex = 0 + if det.shape[0]>1: + img_size = np.asarray(img.shape)[0:2] + bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) + img_center = img_size / 2 + offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) + offset_dist_squared = np.sum(np.power(offsets,2.0),0) + bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering + bbox = det[bindex,:] + landmark = landmarks[bindex, :, :] + return bbox, landmark + + @staticmethod + def check_large_pose(landmark, bbox): + assert landmark.shape==(5,2) + assert len(bbox)==4 + def get_theta(base, x, y): + vx = x-base + vy = y-base + vx[1] *= -1 + vy[1] *= -1 + tx = np.arctan2(vx[1], vx[0]) + ty = np.arctan2(vy[1], vy[0]) + d = ty-tx + d = np.degrees(d) + #print(vx, tx, vy, ty, d) + #if d<-1.*math.pi: + # d+=2*math.pi + #elif d>math.pi: + # d-=2*math.pi + if d<-180.0: + d+=360. + elif d>180.0: + d-=360.0 + return d + landmark = landmark.astype(np.float32) + + theta1 = get_theta(landmark[0], landmark[3], landmark[2]) + theta2 = get_theta(landmark[1], landmark[2], landmark[4]) + #print(va, vb, theta2) + theta3 = get_theta(landmark[0], landmark[2], landmark[1]) + theta4 = get_theta(landmark[1], landmark[0], landmark[2]) + theta5 = get_theta(landmark[3], landmark[4], landmark[2]) + theta6 = get_theta(landmark[4], landmark[2], landmark[3]) + theta7 = get_theta(landmark[3], landmark[2], landmark[0]) + theta8 = get_theta(landmark[4], landmark[1], landmark[2]) + #print(theta1, theta2, theta3, theta4, theta5, theta6, theta7, theta8) + left_score = 0.0 + right_score = 0.0 + up_score = 0.0 + down_score = 0.0 + if theta1<=0.0: + left_score = 10.0 + elif theta2<=0.0: + right_score = 10.0 + else: + left_score = theta2/theta1 + right_score = theta1/theta2 + if theta3<=10.0 or theta4<=10.0: + up_score = 10.0 + else: + up_score = max(theta1/theta3, theta2/theta4) + if theta5<=10.0 or theta6<=10.0: + down_score = 10.0 + else: + down_score = max(theta7/theta5, theta8/theta6) + mleft = (landmark[0][0]+landmark[3][0])/2 + mright = (landmark[1][0]+landmark[4][0])/2 + box_center = ( (bbox[0]+bbox[2])/2, (bbox[1]+bbox[3])/2 ) + ret = 0 + if left_score>=3.0: + ret = 1 + if ret==0 and left_score>=2.0: + if mright<=box_center[0]: + ret = 1 + if ret==0 and right_score>=3.0: + ret = 2 + if ret==0 and right_score>=2.0: + if mleft>=box_center[0]: + ret = 2 + if ret==0 and up_score>=2.0: + ret = 3 + if ret==0 and down_score>=5.0: + ret = 4 + return ret, left_score, right_score, up_score, down_score + + @staticmethod + def _filter_boxes(boxes, min_size): + """ Remove all boxes with any side smaller than min_size """ + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep + + @staticmethod + def _filter_boxes2(boxes, max_size, min_size): + """ Remove all boxes with any side smaller than min_size """ + ws = boxes[:, 2] - boxes[:, 0] + 1 + hs = boxes[:, 3] - boxes[:, 1] + 1 + if max_size>0: + keep = np.where( np.minimum(ws, hs)0: + keep = np.where( np.maximum(ws, hs)>min_size )[0] + return keep + + @staticmethod + def _clip_pad(tensor, pad_shape): + """ + Clip boxes of the pad area. + :param tensor: [n, c, H, W] + :param pad_shape: [h, w] + :return: [n, c, h, w] + """ + H, W = tensor.shape[2:] + h, w = pad_shape + + if h < H or w < W: + tensor = tensor[:, :, :h, :w].copy() + + return tensor + + @staticmethod + def bbox_pred(boxes, box_deltas): + """ + Transform the set of class-agnostic boxes into class-specific boxes + by applying the predicted offsets (box_deltas) + :param boxes: !important [N 4] + :param box_deltas: [N, 4 * num_classes] + :return: [N 4 * num_classes] + """ + if boxes.shape[0] == 0: + return np.zeros((0, box_deltas.shape[1])) + + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + + dx = box_deltas[:, 0:1] + dy = box_deltas[:, 1:2] + dw = box_deltas[:, 2:3] + dh = box_deltas[:, 3:4] + + pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] + pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] + pred_w = np.exp(dw) * widths[:, np.newaxis] + pred_h = np.exp(dh) * heights[:, np.newaxis] + + pred_boxes = np.zeros(box_deltas.shape) + # x1 + pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0) + # y1 + pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0) + # x2 + pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0) + # y2 + pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0) + + if box_deltas.shape[1]>4: + pred_boxes[:,4:] = box_deltas[:,4:] + + return pred_boxes + + @staticmethod + def landmark_pred(boxes, landmark_deltas): + if boxes.shape[0] == 0: + return np.zeros((0, landmark_deltas.shape[1])) + boxes = boxes.astype(np.float, copy=False) + widths = boxes[:, 2] - boxes[:, 0] + 1.0 + heights = boxes[:, 3] - boxes[:, 1] + 1.0 + ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) + ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) + pred = landmark_deltas.copy() + for i in range(5): + pred[:,i,0] = landmark_deltas[:,i,0]*widths + ctr_x + pred[:,i,1] = landmark_deltas[:,i,1]*heights + ctr_y + return pred + #preds = [] + #for i in range(landmark_deltas.shape[1]): + # if i%2==0: + # pred = (landmark_deltas[:,i]*widths + ctr_x) + # else: + # pred = (landmark_deltas[:,i]*heights + ctr_y) + # preds.append(pred) + #preds = np.vstack(preds).transpose() + #return preds + + def bbox_vote(self, det): + #order = det[:, 4].ravel().argsort()[::-1] + #det = det[order, :] + if det.shape[0] == 0: + dets = np.array([[10, 10, 20, 20, 0.002]]) + det = np.empty(shape=[0, 5]) + while det.shape[0] > 0: + # IOU + area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) + xx1 = np.maximum(det[0, 0], det[:, 0]) + yy1 = np.maximum(det[0, 1], det[:, 1]) + xx2 = np.minimum(det[0, 2], det[:, 2]) + yy2 = np.minimum(det[0, 3], det[:, 3]) + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + o = inter / (area[0] + area[:] - inter) + + # nms + merge_index = np.where(o >= self.nms_threshold)[0] + det_accu = det[merge_index, :] + det = np.delete(det, merge_index, 0) + if merge_index.shape[0] <= 1: + if det.shape[0] == 0: + try: + dets = np.row_stack((dets, det_accu)) + except: + dets = det_accu + continue + det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) + max_score = np.max(det_accu[:, 4]) + det_accu_sum = np.zeros((1, 5)) + det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], + axis=0) / np.sum(det_accu[:, -1:]) + det_accu_sum[:, 4] = max_score + try: + dets = np.row_stack((dets, det_accu_sum)) + except: + dets = det_accu_sum + dets = dets[0:750, :] + return dets + diff --git a/retinaface/test_widerface.py b/retinaface/test_widerface.py new file mode 100644 index 0000000..1e87125 --- /dev/null +++ b/retinaface/test_widerface.py @@ -0,0 +1,199 @@ +from __future__ import print_function + +import argparse +import sys +import os +import time +import numpy as np +import mxnet as mx +from mxnet import ndarray as nd +import cv2 +from rcnn.logger import logger +#from rcnn.config import config, default, generate_config +#from rcnn.tools.test_rcnn import test_rcnn +#from rcnn.tools.test_rpn import test_rpn +from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, landmark_pred +from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane +from rcnn.processing.nms import gpu_nms_wrapper +from rcnn.processing.bbox_transform import bbox_overlaps +from rcnn.dataset import retinaface +from retinaface import RetinaFace + + +def parse_args(): + parser = argparse.ArgumentParser(description='Test widerface by retinaface detector') + # general + parser.add_argument('--network', help='network name', default='net3', type=str) + parser.add_argument('--dataset', help='dataset name', default='retinaface', type=str) + parser.add_argument('--image-set', help='image_set name', default='val', type=str) + parser.add_argument('--root-path', help='output data folder', default='./data', type=str) + parser.add_argument('--dataset-path', help='dataset path', default='./data/retinaface', type=str) + parser.add_argument('--gpu', help='GPU device to test with', default=0, type=int) + # testing + parser.add_argument('--prefix', help='model to test with', default='', type=str) + parser.add_argument('--epoch', help='model to test with', default=0, type=int) + parser.add_argument('--output', help='output folder', default='./wout', type=str) + parser.add_argument('--nocrop', help='', action='store_true') + parser.add_argument('--thresh', help='valid detection threshold', default=0.02, type=float) + parser.add_argument('--mode', help='test mode, 0 for fast, 1 for accurate', default=1, type=int) + #parser.add_argument('--pyramid', help='enable pyramid test', action='store_true') + #parser.add_argument('--bbox-vote', help='', action='store_true') + parser.add_argument('--part', help='', default=0, type=int) + parser.add_argument('--parts', help='', default=1, type=int) + args = parser.parse_args() + return args + +detector = None +args = None +imgid = -1 + +def get_boxes(roi, pyramid): + global imgid + im = cv2.imread(roi['image']) + do_flip = False + if not pyramid: + target_size = 1200 + max_size = 1600 + #do_flip = True + target_size = 1504 + max_size = 2000 + target_size = 1600 + max_size = 2150 + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + # prevent bigger axis from being more than max_size: + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + scales = [im_scale] + else: + do_flip = True + #TEST_SCALES = [500, 800, 1200, 1600] + TEST_SCALES = [500, 800, 1100, 1400, 1700] + target_size = 800 + max_size = 1200 + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(target_size) / float(im_size_min) + # prevent bigger axis from being more than max_size: + if np.round(im_scale * im_size_max) > max_size: + im_scale = float(max_size) / float(im_size_max) + scales = [float(scale)/target_size*im_scale for scale in TEST_SCALES] + boxes, landmarks = detector.detect(im, threshold=args.thresh, scales = scales, do_flip=do_flip) + #print(boxes.shape, landmarks.shape) + if imgid>=0 and imgid<100: + font = cv2.FONT_HERSHEY_SIMPLEX + for i in xrange(boxes.shape[0]): + box = boxes[i] + ibox = box[0:4].copy().astype(np.int) + cv2.rectangle(im, (ibox[0], ibox[1]), (ibox[2], ibox[3]), (255, 0, 0), 2) + #print('box', ibox) + #if len(ibox)>5: + # for l in xrange(5): + # pp = (ibox[5+l*2], ibox[6+l*2]) + # cv2.circle(im, (pp[0], pp[1]), 1, (0, 0, 255), 1) + blur = box[5] + k = "%.3f"%blur + cv2.putText(im,k,(ibox[0]+2,ibox[1]+14), font, 0.6, (0,255,0), 2) + #landmarks = box[6:21].reshape( (5,3) ) + if landmarks is not None: + for l in xrange(5): + color = (0,255,0) + landmark = landmarks[i][l] + pp = (int(landmark[0]), int(landmark[1])) + if landmark[2]-0.5<0.0: + color = (0,0,255) + cv2.circle(im, (pp[0], pp[1]), 1, color, 2) + filename = './testimages/%d.jpg'%imgid + cv2.imwrite(filename, im) + print(filename, 'wrote') + imgid+=1 + + return boxes + + +def test(args): + print('test with', args) + global detector + output_folder = args.output + if not os.path.exists(output_folder): + os.mkdir(output_folder) + detector = RetinaFace(args.prefix, args.epoch, args.gpu, network=args.network, nocrop=args.nocrop, vote=args.bbox_vote) + imdb = eval(args.dataset)(args.image_set, args.root_path, args.dataset_path) + roidb = imdb.gt_roidb() + gt_overlaps = np.zeros(0) + overall = [0.0, 0.0] + gt_max = np.array( (0.0, 0.0) ) + num_pos = 0 + print('roidb size', len(roidb)) + + for i in xrange(len(roidb)): + if i%args.parts!=args.part: + continue + #if i%10==0: + # print('processing', i, file=sys.stderr) + roi = roidb[i] + boxes = get_boxes(roi, args.pyramid) + if 'boxes' in roi: + gt_boxes = roi['boxes'].copy() + gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) + num_pos += gt_boxes.shape[0] + + overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) + #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr) + + _gt_overlaps = np.zeros((gt_boxes.shape[0])) + + if boxes.shape[0]>0: + _gt_overlaps = overlaps.max(axis=0) + #print('max_overlaps', _gt_overlaps, file=sys.stderr) + for j in range(len(_gt_overlaps)): + if _gt_overlaps[j]>0.5: + continue + #print(j, 'failed', gt_boxes[j], 'max_overlap:', _gt_overlaps[j], file=sys.stderr) + + # append recorded IoU coverage level + found = (_gt_overlaps > 0.5).sum() + recall = found / float(gt_boxes.shape[0]) + #print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, 'num:', i, file=sys.stderr) + overall[0]+=found + overall[1]+=gt_boxes.shape[0] + #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) + #_recall = (gt_overlaps >= threshold).sum() / float(num_pos) + recall_all = float(overall[0])/overall[1] + #print('recall_all', _recall, file=sys.stderr) + print('[%d]'%i, 'recall', recall, (gt_boxes.shape[0], boxes.shape[0]), 'all:', recall_all, file=sys.stderr) + else: + print('[%d]'%i, 'detect %d faces'%boxes.shape[0]) + + + _vec = roidb[i]['image'].split('/') + out_dir = os.path.join(output_folder, _vec[-2]) + if not os.path.exists(out_dir): + os.mkdir(out_dir) + out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt')) + with open(out_file, 'w') as f: + name = '/'.join(roidb[i]['image'].split('/')[-2:]) + f.write("%s\n"%(name)) + f.write("%d\n"%(boxes.shape[0])) + for b in range(boxes.shape[0]): + box = boxes[b] + f.write("%d %d %d %d %g \n"%(box[0], box[1], box[2]-box[0], box[3]-box[1], box[4])) + +def main(): + global args + args = parse_args() + if args.mode==0: + args.pyramid = False + args.bbox_vote = False + else: + args.pyramid = True + args.bbox_vote = True + logger.info('Called with argument: %s' % args) + test(args) + +if __name__ == '__main__': + main() +