mirror of
https://github.com/deepinsight/insightface.git
synced 2025-12-30 08:02:27 +00:00
753 lines
30 KiB
Python
753 lines
30 KiB
Python
from __future__ import print_function
|
|
import sys
|
|
import os
|
|
import datetime
|
|
import time
|
|
import numpy as np
|
|
import mxnet as mx
|
|
from mxnet import ndarray as nd
|
|
import cv2
|
|
#from rcnn import config
|
|
#from rcnn.processing.bbox_transform import nonlinear_pred, clip_boxes, landmark_pred
|
|
from rcnn.processing.bbox_transform import clip_boxes
|
|
from rcnn.processing.generate_anchor import generate_anchors_fpn, anchors_plane
|
|
from rcnn.processing.nms import gpu_nms_wrapper, cpu_nms_wrapper
|
|
from rcnn.processing.bbox_transform import bbox_overlaps
|
|
|
|
|
|
class RetinaFaceCoV:
|
|
def __init__(self,
|
|
prefix,
|
|
epoch,
|
|
ctx_id=0,
|
|
network='net3',
|
|
nms=0.4,
|
|
nocrop=False):
|
|
self.ctx_id = ctx_id
|
|
self.network = network
|
|
self.nms_threshold = nms
|
|
self.nocrop = nocrop
|
|
self.debug = False
|
|
self.fpn_keys = []
|
|
self.anchor_cfg = None
|
|
pixel_means = [0.0, 0.0, 0.0]
|
|
pixel_stds = [1.0, 1.0, 1.0]
|
|
pixel_scale = 1.0
|
|
self.bbox_stds = [1.0, 1.0, 1.0, 1.0]
|
|
self.landmark_std = 1.0
|
|
self.preprocess = False
|
|
_ratio = (1., )
|
|
fmc = 3
|
|
if network == 'ssh' or network == 'vgg':
|
|
pixel_means = [103.939, 116.779, 123.68]
|
|
self.preprocess = True
|
|
elif network == 'net3':
|
|
_ratio = (1., )
|
|
elif network == 'net3l':
|
|
_ratio = (1., )
|
|
self.landmark_std = 0.2
|
|
elif network == 'net3a':
|
|
_ratio = (1., 1.5)
|
|
elif network == 'net6': #like pyramidbox or s3fd
|
|
fmc = 6
|
|
elif network == 'net5': #retinaface
|
|
fmc = 5
|
|
elif network == 'net5a':
|
|
fmc = 5
|
|
_ratio = (1., 1.5)
|
|
elif network == 'net4':
|
|
fmc = 4
|
|
elif network == 'net4a':
|
|
fmc = 4
|
|
_ratio = (1., 1.5)
|
|
elif network == 'x5':
|
|
fmc = 5
|
|
pixel_means = [103.52, 116.28, 123.675]
|
|
pixel_stds = [57.375, 57.12, 58.395]
|
|
elif network == 'x3':
|
|
fmc = 3
|
|
pixel_means = [103.52, 116.28, 123.675]
|
|
pixel_stds = [57.375, 57.12, 58.395]
|
|
elif network == 'x3a':
|
|
fmc = 3
|
|
_ratio = (1., 1.5)
|
|
pixel_means = [103.52, 116.28, 123.675]
|
|
pixel_stds = [57.375, 57.12, 58.395]
|
|
else:
|
|
assert False, 'network setting error %s' % network
|
|
|
|
if fmc == 3:
|
|
self._feat_stride_fpn = [32, 16, 8]
|
|
self.anchor_cfg = {
|
|
'32': {
|
|
'SCALES': (32, 16),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (8, 4),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
elif fmc == 4:
|
|
self._feat_stride_fpn = [32, 16, 8, 4]
|
|
self.anchor_cfg = {
|
|
'32': {
|
|
'SCALES': (32, 16),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (8, 4),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'4': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
elif fmc == 6:
|
|
self._feat_stride_fpn = [128, 64, 32, 16, 8, 4]
|
|
self.anchor_cfg = {
|
|
'128': {
|
|
'SCALES': (32, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'64': {
|
|
'SCALES': (16, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'32': {
|
|
'SCALES': (8, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (4, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'4': {
|
|
'SCALES': (1, ),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
elif fmc == 5:
|
|
self._feat_stride_fpn = [64, 32, 16, 8, 4]
|
|
self.anchor_cfg = {}
|
|
_ass = 2.0**(1.0 / 3)
|
|
_basescale = 1.0
|
|
for _stride in [4, 8, 16, 32, 64]:
|
|
key = str(_stride)
|
|
value = {
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
}
|
|
scales = []
|
|
for _ in range(3):
|
|
scales.append(_basescale)
|
|
_basescale *= _ass
|
|
value['SCALES'] = tuple(scales)
|
|
self.anchor_cfg[key] = value
|
|
|
|
#print(self._feat_stride_fpn, self.anchor_cfg)
|
|
|
|
for s in self._feat_stride_fpn:
|
|
self.fpn_keys.append('stride%s' % s)
|
|
|
|
dense_anchor = False
|
|
#self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(base_size=fpn_base_size, scales=self._scales, ratios=self._ratios)))
|
|
self._anchors_fpn = dict(
|
|
zip(
|
|
self.fpn_keys,
|
|
generate_anchors_fpn(dense_anchor=dense_anchor,
|
|
cfg=self.anchor_cfg)))
|
|
for k in self._anchors_fpn:
|
|
v = self._anchors_fpn[k].astype(np.float32)
|
|
self._anchors_fpn[k] = v
|
|
|
|
self._num_anchors = dict(
|
|
zip(self.fpn_keys,
|
|
[anchors.shape[0] for anchors in self._anchors_fpn.values()]))
|
|
#self._bbox_pred = nonlinear_pred
|
|
#self._landmark_pred = landmark_pred
|
|
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
|
if self.ctx_id >= 0:
|
|
self.ctx = mx.gpu(self.ctx_id)
|
|
self.nms = gpu_nms_wrapper(self.nms_threshold, self.ctx_id)
|
|
else:
|
|
self.ctx = mx.cpu()
|
|
self.nms = cpu_nms_wrapper(self.nms_threshold)
|
|
self.pixel_means = np.array(pixel_means, dtype=np.float32)
|
|
self.pixel_stds = np.array(pixel_stds, dtype=np.float32)
|
|
self.pixel_scale = float(pixel_scale)
|
|
#print('means', self.pixel_means)
|
|
self.use_landmarks = True
|
|
#print('use_landmarks', self.use_landmarks)
|
|
self.cascade = 0
|
|
|
|
if self.debug:
|
|
c = len(sym) // len(self._feat_stride_fpn)
|
|
sym = sym[(c * 0):]
|
|
self._feat_stride_fpn = [32, 16, 8]
|
|
#print('sym size:', len(sym))
|
|
|
|
image_size = (640, 640)
|
|
self.model = mx.mod.Module(symbol=sym,
|
|
context=self.ctx,
|
|
label_names=None)
|
|
self.model.bind(data_shapes=[('data', (1, 3, image_size[0],
|
|
image_size[1]))],
|
|
for_training=False)
|
|
self.model.set_params(arg_params, aux_params)
|
|
|
|
def get_input(self, img):
|
|
im = img.astype(np.float32)
|
|
im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
|
|
for i in range(3):
|
|
im_tensor[
|
|
0,
|
|
i, :, :] = (im[:, :, 2 - i] / self.pixel_scale -
|
|
self.pixel_means[2 - i]) / self.pixel_stds[2 - i]
|
|
#if self.debug:
|
|
# timeb = datetime.datetime.now()
|
|
# diff = timeb - timea
|
|
# print('X2 uses', diff.total_seconds(), 'seconds')
|
|
data = nd.array(im_tensor)
|
|
return data
|
|
|
|
def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
|
|
#print('in_detect', threshold, scales, do_flip, do_nms)
|
|
proposals_list = []
|
|
scores_list = []
|
|
mask_scores_list = []
|
|
landmarks_list = []
|
|
strides_list = []
|
|
timea = datetime.datetime.now()
|
|
flips = [0]
|
|
if do_flip:
|
|
flips = [0, 1]
|
|
|
|
imgs = [img]
|
|
if isinstance(img, list):
|
|
imgs = img
|
|
for img in imgs:
|
|
for im_scale in scales:
|
|
for flip in flips:
|
|
if im_scale != 1.0:
|
|
im = cv2.resize(img,
|
|
None,
|
|
None,
|
|
fx=im_scale,
|
|
fy=im_scale,
|
|
interpolation=cv2.INTER_LINEAR)
|
|
else:
|
|
im = img.copy()
|
|
if flip:
|
|
im = im[:, ::-1, :]
|
|
if self.nocrop:
|
|
if im.shape[0] % 32 == 0:
|
|
h = im.shape[0]
|
|
else:
|
|
h = (im.shape[0] // 32 + 1) * 32
|
|
if im.shape[1] % 32 == 0:
|
|
w = im.shape[1]
|
|
else:
|
|
w = (im.shape[1] // 32 + 1) * 32
|
|
_im = np.zeros((h, w, 3), dtype=np.float32)
|
|
_im[0:im.shape[0], 0:im.shape[1], :] = im
|
|
im = _im
|
|
else:
|
|
im = im.astype(np.float32)
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('X1 uses', diff.total_seconds(), 'seconds')
|
|
#self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
|
|
#im_info = [im.shape[0], im.shape[1], im_scale]
|
|
im_info = [im.shape[0], im.shape[1]]
|
|
im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
|
|
for i in range(3):
|
|
im_tensor[0, i, :, :] = (
|
|
im[:, :, 2 - i] / self.pixel_scale -
|
|
self.pixel_means[2 - i]) / self.pixel_stds[2 - i]
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('X2 uses', diff.total_seconds(), 'seconds')
|
|
data = nd.array(im_tensor)
|
|
db = mx.io.DataBatch(data=(data, ),
|
|
provide_data=[('data', data.shape)])
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('X3 uses', diff.total_seconds(), 'seconds')
|
|
self.model.forward(db, is_train=False)
|
|
net_out = self.model.get_outputs()
|
|
#post_nms_topN = self._rpn_post_nms_top_n
|
|
#min_size_dict = self._rpn_min_size_fpn
|
|
|
|
sym_idx = 0
|
|
|
|
for _idx, s in enumerate(self._feat_stride_fpn):
|
|
#if len(scales)>1 and s==32 and im_scale==scales[-1]:
|
|
# continue
|
|
_key = 'stride%s' % s
|
|
stride = int(s)
|
|
is_cascade = False
|
|
#if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]):
|
|
# continue
|
|
#print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
|
|
scores = net_out[sym_idx].asnumpy()
|
|
type_scores = net_out[sym_idx + 3].asnumpy()
|
|
print(scores.shape, type_scores.shape)
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('A uses', diff.total_seconds(), 'seconds')
|
|
A = self._num_anchors['stride%s' % s]
|
|
#print(scores.shape)
|
|
#print('scores',stride, scores.shape, file=sys.stderr)
|
|
scores = scores[:, A:, :, :]
|
|
mask_scores = type_scores[:, A * 2:, :, :] #x, A, x, x
|
|
|
|
bbox_deltas = net_out[sym_idx + 1].asnumpy()
|
|
|
|
#if DEBUG:
|
|
# print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
|
|
# print 'scale: {}'.format(im_info[2])
|
|
|
|
#_height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
|
|
height, width = bbox_deltas.shape[
|
|
2], bbox_deltas.shape[3]
|
|
|
|
K = height * width
|
|
anchors_fpn = self._anchors_fpn['stride%s' % s]
|
|
anchors = anchors_plane(height, width, stride,
|
|
anchors_fpn)
|
|
#print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
|
|
anchors = anchors.reshape((K * A, 4))
|
|
#print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr)
|
|
#print('HW', (height, width), file=sys.stderr)
|
|
#print('anchors_fpn', anchors_fpn.shape, file=sys.stderr)
|
|
#print('anchors', anchors.shape, file=sys.stderr)
|
|
#print('bbox_deltas', bbox_deltas.shape, file=sys.stderr)
|
|
#print('scores', scores.shape, file=sys.stderr)
|
|
|
|
#scores = self._clip_pad(scores, (height, width))
|
|
scores = scores.transpose((0, 2, 3, 1)).reshape(
|
|
(-1, 1))
|
|
mask_scores = mask_scores.transpose(
|
|
(0, 2, 3, 1)).reshape((-1, 1))
|
|
|
|
#print('pre', bbox_deltas.shape, height, width)
|
|
#bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
|
|
#print('after', bbox_deltas.shape, height, width)
|
|
bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
|
|
bbox_pred_len = bbox_deltas.shape[3] // A
|
|
#print(bbox_deltas.shape)
|
|
bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
|
|
bbox_deltas[:,
|
|
0::4] = bbox_deltas[:, 0::
|
|
4] * self.bbox_stds[0]
|
|
bbox_deltas[:,
|
|
1::4] = bbox_deltas[:, 1::
|
|
4] * self.bbox_stds[1]
|
|
bbox_deltas[:,
|
|
2::4] = bbox_deltas[:, 2::
|
|
4] * self.bbox_stds[2]
|
|
bbox_deltas[:,
|
|
3::4] = bbox_deltas[:, 3::
|
|
4] * self.bbox_stds[3]
|
|
proposals = self.bbox_pred(anchors, bbox_deltas)
|
|
|
|
proposals = clip_boxes(proposals, im_info[:2])
|
|
|
|
#if self.vote:
|
|
# if im_scale>1.0:
|
|
# keep = self._filter_boxes2(proposals, 160*im_scale, -1)
|
|
# else:
|
|
# keep = self._filter_boxes2(proposals, -1, 100*im_scale)
|
|
# if stride==4:
|
|
# keep = self._filter_boxes2(proposals, 12*im_scale, -1)
|
|
# proposals = proposals[keep, :]
|
|
# scores = scores[keep]
|
|
|
|
#keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
|
|
#proposals = proposals[keep, :]
|
|
#scores = scores[keep]
|
|
#print('333', proposals.shape)
|
|
if stride == 4 and self.decay4 < 1.0:
|
|
scores *= self.decay4
|
|
|
|
scores_ravel = scores.ravel()
|
|
#mask_scores_ravel = mask_scores.ravel()
|
|
#print('__shapes', proposals.shape, scores_ravel.shape)
|
|
#print('max score', np.max(scores_ravel))
|
|
order = np.where(scores_ravel >= threshold)[0]
|
|
#_scores = scores_ravel[order]
|
|
#_order = _scores.argsort()[::-1]
|
|
#order = order[_order]
|
|
proposals = proposals[order, :]
|
|
scores = scores[order]
|
|
mask_scores = mask_scores[order]
|
|
if flip:
|
|
oldx1 = proposals[:, 0].copy()
|
|
oldx2 = proposals[:, 2].copy()
|
|
proposals[:, 0] = im.shape[1] - oldx2 - 1
|
|
proposals[:, 2] = im.shape[1] - oldx1 - 1
|
|
|
|
proposals[:, 0:4] /= im_scale
|
|
|
|
proposals_list.append(proposals)
|
|
scores_list.append(scores)
|
|
mask_scores_list.append(mask_scores)
|
|
|
|
landmark_deltas = net_out[sym_idx + 2].asnumpy()
|
|
#landmark_deltas = self._clip_pad(landmark_deltas, (height, width))
|
|
landmark_pred_len = landmark_deltas.shape[1] // A
|
|
landmark_deltas = landmark_deltas.transpose(
|
|
(0, 2, 3, 1)).reshape(
|
|
(-1, 5, landmark_pred_len // 5))
|
|
landmark_deltas *= self.landmark_std
|
|
#print(landmark_deltas.shape, landmark_deltas)
|
|
landmarks = self.landmark_pred(anchors,
|
|
landmark_deltas)
|
|
landmarks = landmarks[order, :]
|
|
|
|
if flip:
|
|
landmarks[:, :,
|
|
0] = im.shape[1] - landmarks[:, :, 0] - 1
|
|
#for a in range(5):
|
|
# oldx1 = landmarks[:, a].copy()
|
|
# landmarks[:,a] = im.shape[1] - oldx1 - 1
|
|
order = [1, 0, 2, 4, 3]
|
|
flandmarks = landmarks.copy()
|
|
for idx, a in enumerate(order):
|
|
flandmarks[:, idx, :] = landmarks[:, a, :]
|
|
#flandmarks[:, idx*2] = landmarks[:,a*2]
|
|
#flandmarks[:, idx*2+1] = landmarks[:,a*2+1]
|
|
landmarks = flandmarks
|
|
landmarks[:, :, 0:2] /= im_scale
|
|
#landmarks /= im_scale
|
|
#landmarks = landmarks.reshape( (-1, landmark_pred_len) )
|
|
landmarks_list.append(landmarks)
|
|
#proposals = np.hstack((proposals, landmarks))
|
|
sym_idx += 4
|
|
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('B uses', diff.total_seconds(), 'seconds')
|
|
proposals = np.vstack(proposals_list)
|
|
landmarks = None
|
|
if proposals.shape[0] == 0:
|
|
landmarks = np.zeros((0, 5, 2))
|
|
return np.zeros((0, 6)), landmarks
|
|
scores = np.vstack(scores_list)
|
|
mask_scores = np.vstack(mask_scores_list)
|
|
#print('shapes', proposals.shape, scores.shape)
|
|
scores_ravel = scores.ravel()
|
|
order = scores_ravel.argsort()[::-1]
|
|
#if config.TEST.SCORE_THRESH>0.0:
|
|
# _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
|
|
# order = order[:_count]
|
|
proposals = proposals[order, :]
|
|
scores = scores[order]
|
|
mask_scores = mask_scores[order]
|
|
landmarks = np.vstack(landmarks_list)
|
|
landmarks = landmarks[order].astype(np.float32, copy=False)
|
|
|
|
pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
|
|
copy=False)
|
|
keep = self.nms(pre_det)
|
|
det = np.hstack((pre_det, mask_scores))
|
|
det = det[keep, :]
|
|
landmarks = landmarks[keep]
|
|
|
|
if self.debug:
|
|
timeb = datetime.datetime.now()
|
|
diff = timeb - timea
|
|
print('C uses', diff.total_seconds(), 'seconds')
|
|
return det, landmarks
|
|
|
|
def detect_center(self, img, threshold=0.5, scales=[1.0], do_flip=False):
|
|
det, landmarks = self.detect(img, threshold, scales, do_flip)
|
|
if det.shape[0] == 0:
|
|
return None, None
|
|
bindex = 0
|
|
if det.shape[0] > 1:
|
|
img_size = np.asarray(img.shape)[0:2]
|
|
bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] -
|
|
det[:, 1])
|
|
img_center = img_size / 2
|
|
offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
|
|
(det[:, 1] + det[:, 3]) / 2 - img_center[0]])
|
|
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
|
bindex = np.argmax(bounding_box_size - offset_dist_squared *
|
|
2.0) # some extra weight on the centering
|
|
bbox = det[bindex, :]
|
|
landmark = landmarks[bindex, :, :]
|
|
return bbox, landmark
|
|
|
|
@staticmethod
|
|
def check_large_pose(landmark, bbox):
|
|
assert landmark.shape == (5, 2)
|
|
assert len(bbox) == 4
|
|
|
|
def get_theta(base, x, y):
|
|
vx = x - base
|
|
vy = y - base
|
|
vx[1] *= -1
|
|
vy[1] *= -1
|
|
tx = np.arctan2(vx[1], vx[0])
|
|
ty = np.arctan2(vy[1], vy[0])
|
|
d = ty - tx
|
|
d = np.degrees(d)
|
|
#print(vx, tx, vy, ty, d)
|
|
#if d<-1.*math.pi:
|
|
# d+=2*math.pi
|
|
#elif d>math.pi:
|
|
# d-=2*math.pi
|
|
if d < -180.0:
|
|
d += 360.
|
|
elif d > 180.0:
|
|
d -= 360.0
|
|
return d
|
|
|
|
landmark = landmark.astype(np.float32)
|
|
|
|
theta1 = get_theta(landmark[0], landmark[3], landmark[2])
|
|
theta2 = get_theta(landmark[1], landmark[2], landmark[4])
|
|
#print(va, vb, theta2)
|
|
theta3 = get_theta(landmark[0], landmark[2], landmark[1])
|
|
theta4 = get_theta(landmark[1], landmark[0], landmark[2])
|
|
theta5 = get_theta(landmark[3], landmark[4], landmark[2])
|
|
theta6 = get_theta(landmark[4], landmark[2], landmark[3])
|
|
theta7 = get_theta(landmark[3], landmark[2], landmark[0])
|
|
theta8 = get_theta(landmark[4], landmark[1], landmark[2])
|
|
#print(theta1, theta2, theta3, theta4, theta5, theta6, theta7, theta8)
|
|
left_score = 0.0
|
|
right_score = 0.0
|
|
up_score = 0.0
|
|
down_score = 0.0
|
|
if theta1 <= 0.0:
|
|
left_score = 10.0
|
|
elif theta2 <= 0.0:
|
|
right_score = 10.0
|
|
else:
|
|
left_score = theta2 / theta1
|
|
right_score = theta1 / theta2
|
|
if theta3 <= 10.0 or theta4 <= 10.0:
|
|
up_score = 10.0
|
|
else:
|
|
up_score = max(theta1 / theta3, theta2 / theta4)
|
|
if theta5 <= 10.0 or theta6 <= 10.0:
|
|
down_score = 10.0
|
|
else:
|
|
down_score = max(theta7 / theta5, theta8 / theta6)
|
|
mleft = (landmark[0][0] + landmark[3][0]) / 2
|
|
mright = (landmark[1][0] + landmark[4][0]) / 2
|
|
box_center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
|
|
ret = 0
|
|
if left_score >= 3.0:
|
|
ret = 1
|
|
if ret == 0 and left_score >= 2.0:
|
|
if mright <= box_center[0]:
|
|
ret = 1
|
|
if ret == 0 and right_score >= 3.0:
|
|
ret = 2
|
|
if ret == 0 and right_score >= 2.0:
|
|
if mleft >= box_center[0]:
|
|
ret = 2
|
|
if ret == 0 and up_score >= 2.0:
|
|
ret = 3
|
|
if ret == 0 and down_score >= 5.0:
|
|
ret = 4
|
|
return ret, left_score, right_score, up_score, down_score
|
|
|
|
@staticmethod
|
|
def _filter_boxes(boxes, min_size):
|
|
""" Remove all boxes with any side smaller than min_size """
|
|
ws = boxes[:, 2] - boxes[:, 0] + 1
|
|
hs = boxes[:, 3] - boxes[:, 1] + 1
|
|
keep = np.where((ws >= min_size) & (hs >= min_size))[0]
|
|
return keep
|
|
|
|
@staticmethod
|
|
def _filter_boxes2(boxes, max_size, min_size):
|
|
""" Remove all boxes with any side smaller than min_size """
|
|
ws = boxes[:, 2] - boxes[:, 0] + 1
|
|
hs = boxes[:, 3] - boxes[:, 1] + 1
|
|
if max_size > 0:
|
|
keep = np.where(np.minimum(ws, hs) < max_size)[0]
|
|
elif min_size > 0:
|
|
keep = np.where(np.maximum(ws, hs) > min_size)[0]
|
|
return keep
|
|
|
|
@staticmethod
|
|
def _clip_pad(tensor, pad_shape):
|
|
"""
|
|
Clip boxes of the pad area.
|
|
:param tensor: [n, c, H, W]
|
|
:param pad_shape: [h, w]
|
|
:return: [n, c, h, w]
|
|
"""
|
|
H, W = tensor.shape[2:]
|
|
h, w = pad_shape
|
|
|
|
if h < H or w < W:
|
|
tensor = tensor[:, :, :h, :w].copy()
|
|
|
|
return tensor
|
|
|
|
@staticmethod
|
|
def bbox_pred(boxes, box_deltas):
|
|
"""
|
|
Transform the set of class-agnostic boxes into class-specific boxes
|
|
by applying the predicted offsets (box_deltas)
|
|
:param boxes: !important [N 4]
|
|
:param box_deltas: [N, 4 * num_classes]
|
|
:return: [N 4 * num_classes]
|
|
"""
|
|
if boxes.shape[0] == 0:
|
|
return np.zeros((0, box_deltas.shape[1]))
|
|
|
|
boxes = boxes.astype(np.float, copy=False)
|
|
widths = boxes[:, 2] - boxes[:, 0] + 1.0
|
|
heights = boxes[:, 3] - boxes[:, 1] + 1.0
|
|
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
|
|
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
|
|
|
|
dx = box_deltas[:, 0:1]
|
|
dy = box_deltas[:, 1:2]
|
|
dw = box_deltas[:, 2:3]
|
|
dh = box_deltas[:, 3:4]
|
|
|
|
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
|
|
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
|
|
pred_w = np.exp(dw) * widths[:, np.newaxis]
|
|
pred_h = np.exp(dh) * heights[:, np.newaxis]
|
|
|
|
pred_boxes = np.zeros(box_deltas.shape)
|
|
# x1
|
|
pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0)
|
|
# y1
|
|
pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0)
|
|
# x2
|
|
pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0)
|
|
# y2
|
|
pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
|
|
|
|
if box_deltas.shape[1] > 4:
|
|
pred_boxes[:, 4:] = box_deltas[:, 4:]
|
|
|
|
return pred_boxes
|
|
|
|
@staticmethod
|
|
def landmark_pred(boxes, landmark_deltas):
|
|
if boxes.shape[0] == 0:
|
|
return np.zeros((0, landmark_deltas.shape[1]))
|
|
boxes = boxes.astype(np.float, copy=False)
|
|
widths = boxes[:, 2] - boxes[:, 0] + 1.0
|
|
heights = boxes[:, 3] - boxes[:, 1] + 1.0
|
|
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
|
|
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
|
|
pred = landmark_deltas.copy()
|
|
for i in range(5):
|
|
pred[:, i, 0] = landmark_deltas[:, i, 0] * widths + ctr_x
|
|
pred[:, i, 1] = landmark_deltas[:, i, 1] * heights + ctr_y
|
|
return pred
|
|
#preds = []
|
|
#for i in range(landmark_deltas.shape[1]):
|
|
# if i%2==0:
|
|
# pred = (landmark_deltas[:,i]*widths + ctr_x)
|
|
# else:
|
|
# pred = (landmark_deltas[:,i]*heights + ctr_y)
|
|
# preds.append(pred)
|
|
#preds = np.vstack(preds).transpose()
|
|
#return preds
|
|
|
|
def vote(self, det):
|
|
#order = det[:, 4].ravel().argsort()[::-1]
|
|
#det = det[order, :]
|
|
if det.shape[0] == 0:
|
|
return np.zeros((0, 5))
|
|
#dets = np.array([[10, 10, 20, 20, 0.002]])
|
|
#det = np.empty(shape=[0, 5])
|
|
dets = None
|
|
while det.shape[0] > 0:
|
|
if dets is not None and dets.shape[0] >= 750:
|
|
break
|
|
# IOU
|
|
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
|
|
xx1 = np.maximum(det[0, 0], det[:, 0])
|
|
yy1 = np.maximum(det[0, 1], det[:, 1])
|
|
xx2 = np.minimum(det[0, 2], det[:, 2])
|
|
yy2 = np.minimum(det[0, 3], det[:, 3])
|
|
w = np.maximum(0.0, xx2 - xx1 + 1)
|
|
h = np.maximum(0.0, yy2 - yy1 + 1)
|
|
inter = w * h
|
|
o = inter / (area[0] + area[:] - inter)
|
|
|
|
# nms
|
|
merge_index = np.where(o >= self.nms_threshold)[0]
|
|
det_accu = det[merge_index, :]
|
|
det = np.delete(det, merge_index, 0)
|
|
if merge_index.shape[0] <= 1:
|
|
if det.shape[0] == 0:
|
|
try:
|
|
dets = np.row_stack((dets, det_accu))
|
|
except:
|
|
dets = det_accu
|
|
continue
|
|
det_accu[:,
|
|
0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:],
|
|
(1, 4))
|
|
max_score = np.max(det_accu[:, 4])
|
|
det_accu_sum = np.zeros((1, 5))
|
|
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(
|
|
det_accu[:, -1:])
|
|
det_accu_sum[:, 4] = max_score
|
|
if dets is None:
|
|
dets = det_accu_sum
|
|
else:
|
|
dets = np.row_stack((dets, det_accu_sum))
|
|
dets = dets[0:750, :]
|
|
return dets
|