# coding: utf-8 import os import mxnet as mx import numpy as np import math import cv2 from multiprocessing import Pool from itertools import repeat try: from itertools import izip except ImportError: izip = zip def nms(boxes, overlap_threshold, mode='Union'): """ non max suppression Parameters: ---------- box: numpy array n x 5 input bbox array overlap_threshold: float number threshold of overlap mode: float number how to compute overlap ratio, 'Union' or 'Min' Returns: ------- index array of the selected bbox """ # if there are no boxes, return an empty list if len(boxes) == 0: return [] # if the bounding boxes integers, convert them to floats if boxes.dtype.kind == "i": boxes = boxes.astype("float") # initialize the list of picked indexes pick = [] # grab the coordinates of the bounding boxes x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] area = (x2 - x1 + 1) * (y2 - y1 + 1) idxs = np.argsort(score) # keep looping while some indexes still remain in the indexes list while len(idxs) > 0: # grab the last index in the indexes list and add the index value to the list of picked indexes last = len(idxs) - 1 i = idxs[last] pick.append(i) xx1 = np.maximum(x1[i], x1[idxs[:last]]) yy1 = np.maximum(y1[i], y1[idxs[:last]]) xx2 = np.minimum(x2[i], x2[idxs[:last]]) yy2 = np.minimum(y2[i], y2[idxs[:last]]) # compute the width and height of the bounding box w = np.maximum(0, xx2 - xx1 + 1) h = np.maximum(0, yy2 - yy1 + 1) inter = w * h if mode == 'Min': overlap = inter / np.minimum(area[i], area[idxs[:last]]) else: overlap = inter / (area[i] + area[idxs[:last]] - inter) # delete all indexes from the index list that have idxs = np.delete( idxs, np.concatenate(([last], np.where(overlap > overlap_threshold)[0]))) return pick def adjust_input(in_data): """ adjust the input from (h, w, c) to ( 1, c, h, w) for network input Parameters: ---------- in_data: numpy array of shape (h, w, c) input data Returns: ------- out_data: numpy array of shape (1, c, h, w) reshaped array """ if in_data.dtype is not np.dtype('float32'): out_data = in_data.astype(np.float32) else: out_data = in_data out_data = out_data.transpose((2, 0, 1)) out_data = np.expand_dims(out_data, 0) out_data = (out_data - 127.5) * 0.0078125 return out_data def generate_bbox(map, reg, scale, threshold): """ generate bbox from feature map Parameters: ---------- map: numpy array , n x m x 1 detect score for each position reg: numpy array , n x m x 4 bbox scale: float number scale of this detection threshold: float number detect threshold Returns: ------- bbox array """ stride = 2 cellsize = 12 t_index = np.where(map > threshold) # find nothing if t_index[0].size == 0: return np.array([]) dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)] reg = np.array([dx1, dy1, dx2, dy2]) score = map[t_index[0], t_index[1]] boundingbox = np.vstack([ np.round((stride * t_index[1] + 1) / scale), np.round((stride * t_index[0] + 1) / scale), np.round((stride * t_index[1] + 1 + cellsize) / scale), np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg ]) return boundingbox.T def detect_first_stage(img, net, scale, threshold): """ run PNet for first stage Parameters: ---------- img: numpy array, bgr order input image scale: float number how much should the input image scale net: PNet worker Returns: ------- total_boxes : bboxes """ height, width, _ = img.shape hs = int(math.ceil(height * scale)) ws = int(math.ceil(width * scale)) im_data = cv2.resize(img, (ws, hs)) # adjust for the network input input_buf = adjust_input(im_data) output = net.predict(input_buf) boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold) if boxes.size == 0: return None # nms pick = nms(boxes[:, 0:5], 0.5, mode='Union') boxes = boxes[pick] return boxes def detect_first_stage_warpper(args): return detect_first_stage(*args) class MtcnnDetector(object): """ Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks see https://github.com/kpzhang93/MTCNN_face_detection_alignment this is a mxnet version """ def __init__(self, model_folder='.', minsize=20, threshold=[0.6, 0.7, 0.8], factor=0.709, num_worker=1, accurate_landmark=False, ctx=mx.cpu()): """ Initialize the detector Parameters: ---------- model_folder : string path for the models minsize : float number minimal face to detect threshold : float number detect threshold for 3 stages factor: float number scale factor for image pyramid num_worker: int number number of processes we use for first stage accurate_landmark: bool use accurate landmark localization or not """ self.num_worker = num_worker self.accurate_landmark = accurate_landmark # load 4 models from folder models = ['det1', 'det2', 'det3', 'det4'] models = [os.path.join(model_folder, f) for f in models] self.PNets = [] for i in range(num_worker): workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx) self.PNets.append(workner_net) #self.Pool = Pool(num_worker) self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx) self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx) self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx) self.minsize = float(minsize) self.factor = float(factor) self.threshold = threshold def convert_to_square(self, bbox): """ convert bbox to square Parameters: ---------- bbox: numpy array , shape n x 5 input bbox Returns: ------- square bbox """ square_bbox = bbox.copy() h = bbox[:, 3] - bbox[:, 1] + 1 w = bbox[:, 2] - bbox[:, 0] + 1 max_side = np.maximum(h, w) square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - max_side * 0.5 square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - max_side * 0.5 square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1 square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1 return square_bbox def calibrate_box(self, bbox, reg): """ calibrate bboxes Parameters: ---------- bbox: numpy array, shape n x 5 input bboxes reg: numpy array, shape n x 4 bboxex adjustment Returns: ------- bboxes after refinement """ w = bbox[:, 2] - bbox[:, 0] + 1 w = np.expand_dims(w, 1) h = bbox[:, 3] - bbox[:, 1] + 1 h = np.expand_dims(h, 1) reg_m = np.hstack([w, h, w, h]) aug = reg_m * reg bbox[:, 0:4] = bbox[:, 0:4] + aug return bbox def pad(self, bboxes, w, h): """ pad the the bboxes, alse restrict the size of it Parameters: ---------- bboxes: numpy array, n x 5 input bboxes w: float number width of the input image h: float number height of the input image Returns : ------s dy, dx : numpy array, n x 1 start point of the bbox in target image edy, edx : numpy array, n x 1 end point of the bbox in target image y, x : numpy array, n x 1 start point of the bbox in original image ex, ex : numpy array, n x 1 end point of the bbox in original image tmph, tmpw: numpy array, n x 1 height and width of the bbox """ tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:, 3] - bboxes[:, 1] + 1 num_box = bboxes.shape[0] dx, dy = np.zeros((num_box, )), np.zeros((num_box, )) edx, edy = tmpw.copy() - 1, tmph.copy() - 1 x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3] tmp_index = np.where(ex > w - 1) edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index] ex[tmp_index] = w - 1 tmp_index = np.where(ey > h - 1) edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index] ey[tmp_index] = h - 1 tmp_index = np.where(x < 0) dx[tmp_index] = 0 - x[tmp_index] x[tmp_index] = 0 tmp_index = np.where(y < 0) dy[tmp_index] = 0 - y[tmp_index] y[tmp_index] = 0 return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] return_list = [item.astype(np.int32) for item in return_list] return return_list def slice_index(self, number): """ slice the index into (n,n,m), m < n Parameters: ---------- number: int number number """ def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] num_list = range(number) return list(chunks(num_list, self.num_worker)) def detect_face_limited(self, img, det_type=2): height, width, _ = img.shape if det_type >= 2: total_boxes = np.array( [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) output = self.RNet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[1][:, 1] > self.threshold[1]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[1][passed, 1].reshape((-1, )) reg = output[0][passed] # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick] total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) else: total_boxes = np.array( [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) num_box = total_boxes.shape[0] [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) output = self.ONet.predict(input_buf) #print(output[2]) # filter the total_boxes with threshold passed = np.where(output[2][:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[2][passed, 1].reshape((-1, )) reg = output[1][passed] points = output[0][passed] # compute landmark points bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[:, 0:5] = np.expand_dims( total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] points[:, 5:10] = np.expand_dims( total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] points = points[pick] if not self.accurate_landmark: return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1, total_boxes[:, 3] - total_boxes[:, 1] + 1) patchw = np.round(patchw * 0.25) # make it even patchw[np.where(np.mod(patchw, 2) == 1)] += 1 input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) for i in range(5): x, y = points[:, i], points[:, i + 5] x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width, height) for j in range(num_box): tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) tmpim[dy[j]:edy[j] + 1, dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1, x[j]:ex[j] + 1, :] input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input( cv2.resize(tmpim, (24, 24))) output = self.LNet.predict(input_buf) pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points def detect_face(self, img, det_type=0): """ detect face over img Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image Retures: ------- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score) bboxes points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) landmarks """ # check input height, width, _ = img.shape if det_type == 0: MIN_DET_SIZE = 12 if img is None: return None # only works for color image if len(img.shape) != 3: return None # detected boxes total_boxes = [] minl = min(height, width) # get all the valid scales scales = [] m = MIN_DET_SIZE / self.minsize minl *= m factor_count = 0 while minl > MIN_DET_SIZE: scales.append(m * self.factor**factor_count) minl *= self.factor factor_count += 1 ############################################# # first stage ############################################# #for scale in scales: # return_boxes = self.detect_first_stage(img, scale, 0) # if return_boxes is not None: # total_boxes.append(return_boxes) sliced_index = self.slice_index(len(scales)) total_boxes = [] for batch in sliced_index: #local_boxes = self.Pool.map( detect_first_stage_warpper, \ # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) ) local_boxes = map( detect_first_stage_warpper, \ izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) ) total_boxes.extend(local_boxes) # remove the Nones total_boxes = [i for i in total_boxes if i is not None] if len(total_boxes) == 0: return None total_boxes = np.vstack(total_boxes) if total_boxes.size == 0: return None # merge the detection from first stage pick = nms(total_boxes[:, 0:5], 0.7, 'Union') total_boxes = total_boxes[pick] bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 # refine the bboxes total_boxes = np.vstack([ total_boxes[:, 0] + total_boxes[:, 5] * bbw, total_boxes[:, 1] + total_boxes[:, 6] * bbh, total_boxes[:, 2] + total_boxes[:, 7] * bbw, total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4] ]) total_boxes = total_boxes.T total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) else: total_boxes = np.array( [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]], dtype=np.float32) ############################################# # second stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 24, 24) is the input shape for RNet input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24))) output = self.RNet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[1][:, 1] > self.threshold[1]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[1][passed, 1].reshape((-1, )) reg = output[0][passed] # nms pick = nms(total_boxes, 0.7, 'Union') total_boxes = total_boxes[pick] total_boxes = self.calibrate_box(total_boxes, reg[pick]) total_boxes = self.convert_to_square(total_boxes) total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4]) ############################################# # third stage ############################################# num_box = total_boxes.shape[0] # pad the bbox [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(total_boxes, width, height) # (3, 48, 48) is the input shape for ONet input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32) for i in range(num_box): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48))) output = self.ONet.predict(input_buf) # filter the total_boxes with threshold passed = np.where(output[2][:, 1] > self.threshold[2]) total_boxes = total_boxes[passed] if total_boxes.size == 0: return None total_boxes[:, 4] = output[2][passed, 1].reshape((-1, )) reg = output[1][passed] points = output[0][passed] # compute landmark points bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1 bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1 points[:, 0:5] = np.expand_dims( total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5] points[:, 5:10] = np.expand_dims( total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10] # nms total_boxes = self.calibrate_box(total_boxes, reg) pick = nms(total_boxes, 0.7, 'Min') total_boxes = total_boxes[pick] points = points[pick] if not self.accurate_landmark: return total_boxes, points ############################################# # extended stage ############################################# num_box = total_boxes.shape[0] patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1, total_boxes[:, 3] - total_boxes[:, 1] + 1) patchw = np.round(patchw * 0.25) # make it even patchw[np.where(np.mod(patchw, 2) == 1)] += 1 input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32) for i in range(5): x, y = points[:, i], points[:, i + 5] x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad( np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width, height) for j in range(num_box): tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32) tmpim[dy[j]:edy[j] + 1, dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1, x[j]:ex[j] + 1, :] input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input( cv2.resize(tmpim, (24, 24))) output = self.LNet.predict(input_buf) pointx = np.zeros((num_box, 5)) pointy = np.zeros((num_box, 5)) for k in range(5): # do not make a large movement tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35) output[k][tmp_index[0]] = 0.5 pointx[:, k] = np.round(points[:, k] - 0.5 * patchw) + output[k][:, 0] * patchw pointy[:, k] = np.round(points[:, k + 5] - 0.5 * patchw) + output[k][:, 1] * patchw points = np.hstack([pointx, pointy]) points = points.astype(np.int32) return total_boxes, points def list2colmatrix(self, pts_list): """ convert list to column matrix Parameters: ---------- pts_list: input list Retures: ------- colMat: """ assert len(pts_list) > 0 colMat = [] for i in range(len(pts_list)): colMat.append(pts_list[i][0]) colMat.append(pts_list[i][1]) colMat = np.matrix(colMat).transpose() return colMat def find_tfrom_between_shapes(self, from_shape, to_shape): """ find transform between shapes Parameters: ---------- from_shape: to_shape: Retures: ------- tran_m: tran_b: """ assert from_shape.shape[0] == to_shape.shape[ 0] and from_shape.shape[0] % 2 == 0 sigma_from = 0.0 sigma_to = 0.0 cov = np.matrix([[0.0, 0.0], [0.0, 0.0]]) # compute the mean and cov from_shape_points = from_shape.reshape(from_shape.shape[0] / 2, 2) to_shape_points = to_shape.reshape(to_shape.shape[0] / 2, 2) mean_from = from_shape_points.mean(axis=0) mean_to = to_shape_points.mean(axis=0) for i in range(from_shape_points.shape[0]): temp_dis = np.linalg.norm(from_shape_points[i] - mean_from) sigma_from += temp_dis * temp_dis temp_dis = np.linalg.norm(to_shape_points[i] - mean_to) sigma_to += temp_dis * temp_dis cov += (to_shape_points[i].transpose() - mean_to.transpose()) * (from_shape_points[i] - mean_from) sigma_from = sigma_from / to_shape_points.shape[0] sigma_to = sigma_to / to_shape_points.shape[0] cov = cov / to_shape_points.shape[0] # compute the affine matrix s = np.matrix([[1.0, 0.0], [0.0, 1.0]]) u, d, vt = np.linalg.svd(cov) if np.linalg.det(cov) < 0: if d[1] < d[0]: s[1, 1] = -1 else: s[0, 0] = -1 r = u * s * vt c = 1.0 if sigma_from != 0: c = 1.0 / sigma_from * np.trace(np.diag(d) * s) tran_b = mean_to.transpose() - c * r * mean_from.transpose() tran_m = c * r return tran_m, tran_b def extract_image_chips(self, img, points, desired_size=256, padding=0): """ crop and align face Parameters: ---------- img: numpy array, bgr order of shape (1, 3, n, m) input image points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5) desired_size: default 256 padding: default 0 Retures: ------- crop_imgs: list, n cropped and aligned faces """ crop_imgs = [] for p in points: shape = [] for k in range(len(p) / 2): shape.append(p[k]) shape.append(p[k + 5]) if padding > 0: padding = padding else: padding = 0 # average positions of face points mean_face_shape_x = [ 0.224152, 0.75610125, 0.490127, 0.254149, 0.726104 ] mean_face_shape_y = [ 0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233 ] from_points = [] to_points = [] for i in range(len(shape) / 2): x = (padding + mean_face_shape_x[i]) / (2 * padding + 1) * desired_size y = (padding + mean_face_shape_y[i]) / (2 * padding + 1) * desired_size to_points.append([x, y]) from_points.append([shape[2 * i], shape[2 * i + 1]]) # convert the points to Mat from_mat = self.list2colmatrix(from_points) to_mat = self.list2colmatrix(to_points) # compute the similar transfrom tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat) probe_vec = np.matrix([1.0, 0.0]).transpose() probe_vec = tran_m * probe_vec scale = np.linalg.norm(probe_vec) angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0, 0]) from_center = [(shape[0] + shape[2]) / 2.0, (shape[1] + shape[3]) / 2.0] to_center = [0, 0] to_center[1] = desired_size * 0.4 to_center[0] = desired_size * 0.5 ex = to_center[0] - from_center[0] ey = to_center[1] - from_center[1] rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]), -1 * angle, scale) rot_mat[0][2] += ex rot_mat[1][2] += ey chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size)) crop_imgs.append(chips) return crop_imgs