use retinaface in deploy/face_model

This commit is contained in:
nttstar
2020-11-07 13:22:05 +08:00
parent b774d6a1b7
commit fd10b74b45
7 changed files with 201 additions and 476 deletions

View File

@@ -1,39 +0,0 @@
import face_embedding
import argparse
import cv2
import numpy as np
import datetime
parser = argparse.ArgumentParser(description='face model test')
# general
parser.add_argument('--image-size', default='112,112', help='')
parser.add_argument('--model',
default='../models/model-r34-amf/model,0',
help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument('--det',
default=2,
type=int,
help='mtcnn option, 2 means using R+O, else using O')
parser.add_argument('--flip',
default=0,
type=int,
help='whether do lr flip aug')
parser.add_argument('--threshold',
default=1.24,
type=float,
help='ver dist threshold')
args = parser.parse_args()
model = face_embedding.FaceModel(args)
#img = cv2.imread('/raid5data/dplearn/lfw/Jude_Law/Jude_Law_0001.jpg')
img = cv2.imread(
'/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54745.png'
)
time_now = datetime.datetime.now()
for i in range(3000):
f1 = model.get_feature(img)
time_now2 = datetime.datetime.now()
diff = time_now2 - time_now
print(diff.total_seconds() / 3000)

View File

@@ -1,102 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy import misc
import sys
import os
import argparse
import tensorflow as tf
import numpy as np
import mxnet as mx
import random
import cv2
import sklearn
from sklearn.decomposition import PCA
from time import sleep
from easydict import EasyDict as edict
from mtcnn_detector import MtcnnDetector
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
import face_image
import face_preprocess
def do_flip(data):
for idx in range(data.shape[0]):
data[idx, :, :] = np.fliplr(data[idx, :, :])
class FaceModel:
def __init__(self, args):
self.args = args
model = edict()
self.threshold = args.threshold
self.det_minsize = 50
self.det_threshold = [0.4, 0.6, 0.6]
self.det_factor = 0.9
_vec = args.image_size.split(',')
assert len(_vec) == 2
image_size = (int(_vec[0]), int(_vec[1]))
self.image_size = image_size
_vec = args.model.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading', prefix, epoch)
ctx = mx.gpu(args.gpu)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers['fc1_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
model.bind(data_shapes=[('data', (1, 3, image_size[0],
image_size[1]))])
model.set_params(arg_params, aux_params)
self.model = model
mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
detector = MtcnnDetector(model_folder=mtcnn_path,
ctx=ctx,
num_worker=1,
accurate_landmark=True,
threshold=[0.0, 0.0, 0.2])
self.detector = detector
def get_feature(self, face_img):
#face_img is bgr image
ret = self.detector.detect_face_limited(face_img,
det_type=self.args.det)
if ret is None:
return None
bbox, points = ret
if bbox.shape[0] == 0:
return None
bbox = bbox[0, 0:4]
points = points[0, :].reshape((2, 5)).T
#print(bbox)
#print(points)
nimg = face_preprocess.preprocess(face_img,
bbox,
points,
image_size='112,112')
nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
aligned = np.transpose(nimg, (2, 0, 1))
#print(nimg.shape)
embedding = None
for flipid in [0, 1]:
if flipid == 1:
if self.args.flip == 0:
break
do_flip(aligned)
input_blob = np.expand_dims(aligned, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data, ))
self.model.forward(db, is_train=False)
_embedding = self.model.get_outputs()[0].asnumpy()
#print(_embedding.shape)
if embedding is None:
embedding = _embedding
else:
embedding += _embedding
embedding = sklearn.preprocessing.normalize(embedding).flatten()
return embedding

View File

@@ -2,23 +2,14 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy import misc
import sys
import os
import argparse
#import tensorflow as tf
import numpy as np
import mxnet as mx
import random
import cv2
import sklearn
from sklearn.decomposition import PCA
from time import sleep
from easydict import EasyDict as edict
from mtcnn_detector import MtcnnDetector
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
import face_image
import face_preprocess
import insightface
from insightface.utils import face_align
def do_flip(data):
@@ -26,11 +17,7 @@ def do_flip(data):
data[idx, :, :] = np.fliplr(data[idx, :, :])
def get_model(ctx, image_size, model_str, layer):
_vec = model_str.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
def get_model(ctx, image_size, prefix, epoch, layer):
print('loading', prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
@@ -43,77 +30,38 @@ def get_model(ctx, image_size, model_str, layer):
class FaceModel:
def __init__(self, args):
self.args = args
ctx = mx.gpu(args.gpu)
_vec = args.image_size.split(',')
assert len(_vec) == 2
image_size = (int(_vec[0]), int(_vec[1]))
self.model = None
self.ga_model = None
if len(args.model) > 0:
self.model = get_model(ctx, image_size, args.model, 'fc1')
if len(args.ga_model) > 0:
self.ga_model = get_model(ctx, image_size, args.ga_model, 'fc1')
self.threshold = args.threshold
self.det_minsize = 50
self.det_threshold = [0.6, 0.7, 0.8]
#self.det_factor = 0.9
self.image_size = image_size
mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
if args.det == 0:
detector = MtcnnDetector(model_folder=mtcnn_path,
ctx=ctx,
num_worker=1,
accurate_landmark=True,
threshold=self.det_threshold)
def __init__(self, ctx_id, model_prefix, model_epoch, use_large_detector=False):
if use_large_detector:
self.detector = insightface.model_zoo.get_model('retinaface_r50_v1')
else:
detector = MtcnnDetector(model_folder=mtcnn_path,
ctx=ctx,
num_worker=1,
accurate_landmark=True,
threshold=[0.0, 0.0, 0.2])
self.detector = detector
self.detector = insightface.model_zoo.get_model('retinaface_mnet025_v2')
self.detector.prepare(ctx_id=ctx_id)
if ctx_id>=0:
ctx = mx.gpu(ctx_id)
else:
ctx = mx.cpu()
image_size = (112,112)
self.model = get_model(ctx, image_size, model_prefix, model_epoch, 'fc1')
self.image_size = image_size
def get_input(self, face_img):
ret = self.detector.detect_face(face_img, det_type=self.args.det)
if ret is None:
return None
bbox, points = ret
if bbox.shape[0] == 0:
bbox, pts5 = self.detector.detect(face_img, threshold=0.8)
if bbox.shape[0]==0:
return None
bbox = bbox[0, 0:4]
points = points[0, :].reshape((2, 5)).T
#print(bbox)
#print(points)
nimg = face_preprocess.preprocess(face_img,
bbox,
points,
image_size='112,112')
nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
aligned = np.transpose(nimg, (2, 0, 1))
return aligned
pts5 = pts5[0, :]
nimg = face_align.norm_crop(face_img, pts5)
return nimg
def get_feature(self, aligned):
input_blob = np.expand_dims(aligned, axis=0)
a = cv2.cvtColor(aligned, cv2.COLOR_BGR2RGB)
a = np.transpose(a, (2, 0, 1))
input_blob = np.expand_dims(a, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data, ))
self.model.forward(db, is_train=False)
embedding = self.model.get_outputs()[0].asnumpy()
embedding = sklearn.preprocessing.normalize(embedding).flatten()
return embedding
emb = self.model.get_outputs()[0].asnumpy()[0]
norm = np.sqrt(np.sum(emb*emb)+0.00001)
emb /= norm
return emb
def get_ga(self, aligned):
input_blob = np.expand_dims(aligned, axis=0)
data = mx.nd.array(input_blob)
db = mx.io.DataBatch(data=(data, ))
self.ga_model.forward(db, is_train=False)
ret = self.ga_model.get_outputs()[0].asnumpy()
g = ret[:, 0:2].flatten()
gender = np.argmax(g)
a = ret[:, 2:202].reshape((100, 2))
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age

View File

@@ -1,53 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import argparse
import numpy as np
import mxnet as mx
parser = argparse.ArgumentParser(description='merge age and gender models')
# general
parser.add_argument('--age-model', default='', help='path to load age model.')
parser.add_argument('--gender-model',
default='',
help='path to load gender model.')
parser.add_argument('--prefix', default='', help='path to save model.')
args = parser.parse_args()
i = 0
tsym = None
targ = {}
taux = {}
for model in [args.age_model, args.gender_model]:
_vec = model.split(',')
assert len(_vec) == 2
prefix = _vec[0]
epoch = int(_vec[1])
print('loading', prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
if tsym is None:
all_layers = sym.get_internals()
tsym = all_layers['fc1_output']
if i == 0:
prefix = 'age'
else:
prefix = 'gender'
for k, v in arg_params.iteritems():
if k.startswith(prefix):
print('arg', i, k)
targ[k] = v
for k, v in aux_params.iteritems():
if k.startswith(prefix):
print('aux', i, k)
taux[k] = v
i += 1
dellist = []
#for k,v in arg_params.iteritems():
# if k.startswith('fc7'):
# dellist.append(k)
for d in dellist:
del targ[d]
mx.model.save_checkpoint(args.prefix, 0, tsym, targ, taux)

View File

@@ -1,172 +0,0 @@
# coding: utf-8
# YuanYang
import math
import cv2
import numpy as np
def nms(boxes, overlap_threshold, mode='Union'):
"""
non max suppression
Parameters:
----------
box: numpy array n x 5
input bbox array
overlap_threshold: float number
threshold of overlap
mode: float number
how to compute overlap ratio, 'Union' or 'Min'
Returns:
-------
index array of the selected bbox
"""
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(score)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h
if mode == 'Min':
overlap = inter / np.minimum(area[i], area[idxs[:last]])
else:
overlap = inter / (area[i] + area[idxs[:last]] - inter)
# delete all indexes from the index list that have
idxs = np.delete(
idxs,
np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
return pick
def adjust_input(in_data):
"""
adjust the input from (h, w, c) to ( 1, c, h, w) for network input
Parameters:
----------
in_data: numpy array of shape (h, w, c)
input data
Returns:
-------
out_data: numpy array of shape (1, c, h, w)
reshaped array
"""
if in_data.dtype is not np.dtype('float32'):
out_data = in_data.astype(np.float32)
else:
out_data = in_data
out_data = out_data.transpose((2, 0, 1))
out_data = np.expand_dims(out_data, 0)
out_data = (out_data - 127.5) * 0.0078125
return out_data
def generate_bbox(map, reg, scale, threshold):
"""
generate bbox from feature map
Parameters:
----------
map: numpy array , n x m x 1
detect score for each position
reg: numpy array , n x m x 4
bbox
scale: float number
scale of this detection
threshold: float number
detect threshold
Returns:
-------
bbox array
"""
stride = 2
cellsize = 12
t_index = np.where(map > threshold)
# find nothing
if t_index[0].size == 0:
return np.array([])
dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
reg = np.array([dx1, dy1, dx2, dy2])
score = map[t_index[0], t_index[1]]
boundingbox = np.vstack([
np.round((stride * t_index[1] + 1) / scale),
np.round((stride * t_index[0] + 1) / scale),
np.round((stride * t_index[1] + 1 + cellsize) / scale),
np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
])
return boundingbox.T
def detect_first_stage(img, net, scale, threshold):
"""
run PNet for first stage
Parameters:
----------
img: numpy array, bgr order
input image
scale: float number
how much should the input image scale
net: PNet
worker
Returns:
-------
total_boxes : bboxes
"""
height, width, _ = img.shape
hs = int(math.ceil(height * scale))
ws = int(math.ceil(width * scale))
im_data = cv2.resize(img, (ws, hs))
# adjust for the network input
input_buf = adjust_input(im_data)
output = net.predict(input_buf)
boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
if boxes.size == 0:
return None
# nms
pick = nms(boxes[:, 0:5], 0.5, mode='Union')
boxes = boxes[pick]
return boxes
def detect_first_stage_warpper(args):
return detect_first_stage(*args)

View File

@@ -11,8 +11,171 @@ try:
except ImportError:
izip = zip
from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
def nms(boxes, overlap_threshold, mode='Union'):
"""
non max suppression
Parameters:
----------
box: numpy array n x 5
input bbox array
overlap_threshold: float number
threshold of overlap
mode: float number
how to compute overlap ratio, 'Union' or 'Min'
Returns:
-------
index array of the selected bbox
"""
# if there are no boxes, return an empty list
if len(boxes) == 0:
return []
# if the bounding boxes integers, convert them to floats
if boxes.dtype.kind == "i":
boxes = boxes.astype("float")
# initialize the list of picked indexes
pick = []
# grab the coordinates of the bounding boxes
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
idxs = np.argsort(score)
# keep looping while some indexes still remain in the indexes list
while len(idxs) > 0:
# grab the last index in the indexes list and add the index value to the list of picked indexes
last = len(idxs) - 1
i = idxs[last]
pick.append(i)
xx1 = np.maximum(x1[i], x1[idxs[:last]])
yy1 = np.maximum(y1[i], y1[idxs[:last]])
xx2 = np.minimum(x2[i], x2[idxs[:last]])
yy2 = np.minimum(y2[i], y2[idxs[:last]])
# compute the width and height of the bounding box
w = np.maximum(0, xx2 - xx1 + 1)
h = np.maximum(0, yy2 - yy1 + 1)
inter = w * h
if mode == 'Min':
overlap = inter / np.minimum(area[i], area[idxs[:last]])
else:
overlap = inter / (area[i] + area[idxs[:last]] - inter)
# delete all indexes from the index list that have
idxs = np.delete(
idxs,
np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
return pick
def adjust_input(in_data):
"""
adjust the input from (h, w, c) to ( 1, c, h, w) for network input
Parameters:
----------
in_data: numpy array of shape (h, w, c)
input data
Returns:
-------
out_data: numpy array of shape (1, c, h, w)
reshaped array
"""
if in_data.dtype is not np.dtype('float32'):
out_data = in_data.astype(np.float32)
else:
out_data = in_data
out_data = out_data.transpose((2, 0, 1))
out_data = np.expand_dims(out_data, 0)
out_data = (out_data - 127.5) * 0.0078125
return out_data
def generate_bbox(map, reg, scale, threshold):
"""
generate bbox from feature map
Parameters:
----------
map: numpy array , n x m x 1
detect score for each position
reg: numpy array , n x m x 4
bbox
scale: float number
scale of this detection
threshold: float number
detect threshold
Returns:
-------
bbox array
"""
stride = 2
cellsize = 12
t_index = np.where(map > threshold)
# find nothing
if t_index[0].size == 0:
return np.array([])
dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
reg = np.array([dx1, dy1, dx2, dy2])
score = map[t_index[0], t_index[1]]
boundingbox = np.vstack([
np.round((stride * t_index[1] + 1) / scale),
np.round((stride * t_index[0] + 1) / scale),
np.round((stride * t_index[1] + 1 + cellsize) / scale),
np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
])
return boundingbox.T
def detect_first_stage(img, net, scale, threshold):
"""
run PNet for first stage
Parameters:
----------
img: numpy array, bgr order
input image
scale: float number
how much should the input image scale
net: PNet
worker
Returns:
-------
total_boxes : bboxes
"""
height, width, _ = img.shape
hs = int(math.ceil(height * scale))
ws = int(math.ceil(width * scale))
im_data = cv2.resize(img, (ws, hs))
# adjust for the network input
input_buf = adjust_input(im_data)
output = net.predict(input_buf)
boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
if boxes.size == 0:
return None
# nms
pick = nms(boxes[:, 0:5], 0.5, mode='Union')
boxes = boxes[pick]
return boxes
def detect_first_stage_warpper(args):
return detect_first_stage(*args)
class MtcnnDetector(object):
"""
@@ -698,3 +861,4 @@ class MtcnnDetector(object):
crop_imgs.append(chips)
return crop_imgs

View File

@@ -8,39 +8,18 @@ parser = argparse.ArgumentParser(description='face model test')
# general
parser.add_argument('--image-size', default='112,112', help='')
parser.add_argument('--model', default='', help='path to load model.')
parser.add_argument('--ga-model', default='', help='path to load model.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument(
'--det',
default=0,
type=int,
help='mtcnn option, 1 means using R+O, 0 means detect from begining')
parser.add_argument('--flip',
default=0,
type=int,
help='whether do lr flip aug')
parser.add_argument('--threshold',
default=1.24,
type=float,
help='ver dist threshold')
args = parser.parse_args()
model = face_model.FaceModel(args)
vec = args.model.split(',')
model_prefix = vec[0]
model_epoch = int(vec[1])
model = face_model.FaceModel(args.gpu, model_prefix, model_epoch)
img = cv2.imread('Tom_Hanks_54745.png')
img = model.get_input(img)
#f1 = model.get_feature(img)
#print(f1[0:10])
gender, age = model.get_ga(img)
print(gender)
print(age)
sys.exit(0)
img = cv2.imread(
'/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54733.png'
)
f1 = model.get_feature(img)
f2 = model.get_feature(img)
dist = np.sum(np.square(f1 - f2))
print(dist)
sim = np.dot(f1, f2.T)
print(sim)
#diff = np.subtract(source_feature, target_feature)
#dist = np.sum(np.square(diff),1)
sim = np.dot(f1, f2)
assert(sim>=0.99 and sim<1.01)