use retinaface in deploy/face_model

2025-12-30 08:02:27 +00:00 · 2020-11-07 13:22:05 +08:00
parent b774d6a1b7
commit fd10b74b45
7 changed files with 201 additions and 476 deletions
--- a/deploy/benchmark.py
+++ b/deploy/benchmark.py
@@ -1,39 +0,0 @@
-import face_embedding
-import argparse
-import cv2
-import numpy as np
-import datetime
-
-parser = argparse.ArgumentParser(description='face model test')
-# general
-parser.add_argument('--image-size', default='112,112', help='')
-parser.add_argument('--model',
-                    default='../models/model-r34-amf/model,0',
-                    help='path to load model.')
-parser.add_argument('--gpu', default=0, type=int, help='gpu id')
-parser.add_argument('--det',
-                    default=2,
-                    type=int,
-                    help='mtcnn option, 2 means using R+O, else using O')
-parser.add_argument('--flip',
-                    default=0,
-                    type=int,
-                    help='whether do lr flip aug')
-parser.add_argument('--threshold',
-                    default=1.24,
-                    type=float,
-                    help='ver dist threshold')
-args = parser.parse_args()
-
-model = face_embedding.FaceModel(args)
-#img = cv2.imread('/raid5data/dplearn/lfw/Jude_Law/Jude_Law_0001.jpg')
-img = cv2.imread(
-    '/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54745.png'
-)
-
-time_now = datetime.datetime.now()
-for i in range(3000):
-    f1 = model.get_feature(img)
-time_now2 = datetime.datetime.now()
-diff = time_now2 - time_now
-print(diff.total_seconds() / 3000)
--- a/deploy/face_embedding.py
+++ b/deploy/face_embedding.py
@@ -1,102 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from scipy import misc
-import sys
-import os
-import argparse
-import tensorflow as tf
-import numpy as np
-import mxnet as mx
-import random
-import cv2
-import sklearn
-from sklearn.decomposition import PCA
-from time import sleep
-from easydict import EasyDict as edict
-from mtcnn_detector import MtcnnDetector
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
-import face_image
-import face_preprocess
-
-
-def do_flip(data):
-    for idx in range(data.shape[0]):
-        data[idx, :, :] = np.fliplr(data[idx, :, :])
-
-
-class FaceModel:
-    def __init__(self, args):
-        self.args = args
-        model = edict()
-
-        self.threshold = args.threshold
-        self.det_minsize = 50
-        self.det_threshold = [0.4, 0.6, 0.6]
-        self.det_factor = 0.9
-        _vec = args.image_size.split(',')
-        assert len(_vec) == 2
-        image_size = (int(_vec[0]), int(_vec[1]))
-        self.image_size = image_size
-        _vec = args.model.split(',')
-        assert len(_vec) == 2
-        prefix = _vec[0]
-        epoch = int(_vec[1])
-        print('loading', prefix, epoch)
-        ctx = mx.gpu(args.gpu)
-        sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
-        all_layers = sym.get_internals()
-        sym = all_layers['fc1_output']
-        model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
-        #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
-        model.bind(data_shapes=[('data', (1, 3, image_size[0],
-                                          image_size[1]))])
-        model.set_params(arg_params, aux_params)
-        self.model = model
-        mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
-        detector = MtcnnDetector(model_folder=mtcnn_path,
-                                 ctx=ctx,
-                                 num_worker=1,
-                                 accurate_landmark=True,
-                                 threshold=[0.0, 0.0, 0.2])
-        self.detector = detector
-
-    def get_feature(self, face_img):
-        #face_img is bgr image
-        ret = self.detector.detect_face_limited(face_img,
-                                                det_type=self.args.det)
-        if ret is None:
-            return None
-        bbox, points = ret
-        if bbox.shape[0] == 0:
-            return None
-        bbox = bbox[0, 0:4]
-        points = points[0, :].reshape((2, 5)).T
-        #print(bbox)
-        #print(points)
-        nimg = face_preprocess.preprocess(face_img,
-                                          bbox,
-                                          points,
-                                          image_size='112,112')
-        nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
-        aligned = np.transpose(nimg, (2, 0, 1))
-        #print(nimg.shape)
-        embedding = None
-        for flipid in [0, 1]:
-            if flipid == 1:
-                if self.args.flip == 0:
-                    break
-                do_flip(aligned)
-            input_blob = np.expand_dims(aligned, axis=0)
-            data = mx.nd.array(input_blob)
-            db = mx.io.DataBatch(data=(data, ))
-            self.model.forward(db, is_train=False)
-            _embedding = self.model.get_outputs()[0].asnumpy()
-            #print(_embedding.shape)
-            if embedding is None:
-                embedding = _embedding
-            else:
-                embedding += _embedding
-        embedding = sklearn.preprocessing.normalize(embedding).flatten()
-        return embedding
--- a/deploy/face_model.py
+++ b/deploy/face_model.py
@@ -2,23 +2,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from scipy import misc
 import sys
 import os
 import argparse
-#import tensorflow as tf
 import numpy as np
 import mxnet as mx
-import random
 import cv2
-import sklearn
-from sklearn.decomposition import PCA
-from time import sleep
-from easydict import EasyDict as edict
-from mtcnn_detector import MtcnnDetector
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
-import face_image
-import face_preprocess
+import insightface
+from insightface.utils import face_align


 def do_flip(data):
@@ -26,11 +17,7 @@ def do_flip(data):
        data[idx, :, :] = np.fliplr(data[idx, :, :])


-def get_model(ctx, image_size, model_str, layer):
-    _vec = model_str.split(',')
-    assert len(_vec) == 2
-    prefix = _vec[0]
-    epoch = int(_vec[1])
+def get_model(ctx, image_size, prefix, epoch, layer):
    print('loading', prefix, epoch)
    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
    all_layers = sym.get_internals()
@@ -43,77 +30,38 @@ def get_model(ctx, image_size, model_str, layer):


 class FaceModel:
-    def __init__(self, args):
-        self.args = args
-        ctx = mx.gpu(args.gpu)
-        _vec = args.image_size.split(',')
-        assert len(_vec) == 2
-        image_size = (int(_vec[0]), int(_vec[1]))
-        self.model = None
-        self.ga_model = None
-        if len(args.model) > 0:
-            self.model = get_model(ctx, image_size, args.model, 'fc1')
-        if len(args.ga_model) > 0:
-            self.ga_model = get_model(ctx, image_size, args.ga_model, 'fc1')
-
-        self.threshold = args.threshold
-        self.det_minsize = 50
-        self.det_threshold = [0.6, 0.7, 0.8]
-        #self.det_factor = 0.9
-        self.image_size = image_size
-        mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
-        if args.det == 0:
-            detector = MtcnnDetector(model_folder=mtcnn_path,
-                                     ctx=ctx,
-                                     num_worker=1,
-                                     accurate_landmark=True,
-                                     threshold=self.det_threshold)
+    def __init__(self, ctx_id, model_prefix, model_epoch, use_large_detector=False):
+        if use_large_detector:
+            self.detector = insightface.model_zoo.get_model('retinaface_r50_v1')
        else:
-            detector = MtcnnDetector(model_folder=mtcnn_path,
-                                     ctx=ctx,
-                                     num_worker=1,
-                                     accurate_landmark=True,
-                                     threshold=[0.0, 0.0, 0.2])
-        self.detector = detector
+            self.detector = insightface.model_zoo.get_model('retinaface_mnet025_v2')
+        self.detector.prepare(ctx_id=ctx_id)
+        if ctx_id>=0:
+            ctx = mx.gpu(ctx_id)
+        else:
+            ctx = mx.cpu()
+        image_size = (112,112)
+        self.model = get_model(ctx, image_size, model_prefix, model_epoch, 'fc1')
+        self.image_size = image_size

    def get_input(self, face_img):
-        ret = self.detector.detect_face(face_img, det_type=self.args.det)
-        if ret is None:
-            return None
-        bbox, points = ret
-        if bbox.shape[0] == 0:
+        bbox, pts5 = self.detector.detect(face_img, threshold=0.8)
+        if bbox.shape[0]==0:
            return None
        bbox = bbox[0, 0:4]
-        points = points[0, :].reshape((2, 5)).T
-        #print(bbox)
-        #print(points)
-        nimg = face_preprocess.preprocess(face_img,
-                                          bbox,
-                                          points,
-                                          image_size='112,112')
-        nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
-        aligned = np.transpose(nimg, (2, 0, 1))
-        return aligned
+        pts5 = pts5[0, :]
+        nimg = face_align.norm_crop(face_img, pts5)
+        return nimg

    def get_feature(self, aligned):
-        input_blob = np.expand_dims(aligned, axis=0)
+        a = cv2.cvtColor(aligned, cv2.COLOR_BGR2RGB)
+        a = np.transpose(a, (2, 0, 1))
+        input_blob = np.expand_dims(a, axis=0)
        data = mx.nd.array(input_blob)
        db = mx.io.DataBatch(data=(data, ))
        self.model.forward(db, is_train=False)
-        embedding = self.model.get_outputs()[0].asnumpy()
-        embedding = sklearn.preprocessing.normalize(embedding).flatten()
-        return embedding
+        emb = self.model.get_outputs()[0].asnumpy()[0]
+        norm = np.sqrt(np.sum(emb*emb)+0.00001)
+        emb /= norm
+        return emb

-    def get_ga(self, aligned):
-        input_blob = np.expand_dims(aligned, axis=0)
-        data = mx.nd.array(input_blob)
-        db = mx.io.DataBatch(data=(data, ))
-        self.ga_model.forward(db, is_train=False)
-        ret = self.ga_model.get_outputs()[0].asnumpy()
-        g = ret[:, 0:2].flatten()
-        gender = np.argmax(g)
-        a = ret[:, 2:202].reshape((100, 2))
-        a = np.argmax(a, axis=1)
-        age = int(sum(a))
-
-        return gender, age
--- a/deploy/ga_merge.py
+++ b/deploy/ga_merge.py
@@ -1,53 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import os
-import argparse
-import numpy as np
-import mxnet as mx
-
-parser = argparse.ArgumentParser(description='merge age and gender models')
-# general
-parser.add_argument('--age-model', default='', help='path to load age model.')
-parser.add_argument('--gender-model',
-                    default='',
-                    help='path to load gender model.')
-parser.add_argument('--prefix', default='', help='path to save model.')
-args = parser.parse_args()
-
-i = 0
-tsym = None
-targ = {}
-taux = {}
-for model in [args.age_model, args.gender_model]:
-    _vec = model.split(',')
-    assert len(_vec) == 2
-    prefix = _vec[0]
-    epoch = int(_vec[1])
-    print('loading', prefix, epoch)
-    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
-    if tsym is None:
-        all_layers = sym.get_internals()
-        tsym = all_layers['fc1_output']
-    if i == 0:
-        prefix = 'age'
-    else:
-        prefix = 'gender'
-    for k, v in arg_params.iteritems():
-        if k.startswith(prefix):
-            print('arg', i, k)
-            targ[k] = v
-    for k, v in aux_params.iteritems():
-        if k.startswith(prefix):
-            print('aux', i, k)
-            taux[k] = v
-    i += 1
-dellist = []
-#for k,v in arg_params.iteritems():
-#  if k.startswith('fc7'):
-#    dellist.append(k)
-for d in dellist:
-    del targ[d]
-mx.model.save_checkpoint(args.prefix, 0, tsym, targ, taux)
--- a/deploy/helper.py
+++ b/deploy/helper.py
@@ -1,172 +0,0 @@
-# coding: utf-8
-# YuanYang
-import math
-import cv2
-import numpy as np
-
-
-def nms(boxes, overlap_threshold, mode='Union'):
-    """
-        non max suppression
-
-    Parameters:
-    ----------
-        box: numpy array n x 5
-            input bbox array
-        overlap_threshold: float number
-            threshold of overlap
-        mode: float number
-            how to compute overlap ratio, 'Union' or 'Min'
-    Returns:
-    -------
-        index array of the selected bbox
-    """
-    # if there are no boxes, return an empty list
-    if len(boxes) == 0:
-        return []
-
-    # if the bounding boxes integers, convert them to floats
-    if boxes.dtype.kind == "i":
-        boxes = boxes.astype("float")
-
-    # initialize the list of picked indexes
-    pick = []
-
-    # grab the coordinates of the bounding boxes
-    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
-
-    area = (x2 - x1 + 1) * (y2 - y1 + 1)
-    idxs = np.argsort(score)
-
-    # keep looping while some indexes still remain in the indexes list
-    while len(idxs) > 0:
-        # grab the last index in the indexes list and add the index value to the list of picked indexes
-        last = len(idxs) - 1
-        i = idxs[last]
-        pick.append(i)
-
-        xx1 = np.maximum(x1[i], x1[idxs[:last]])
-        yy1 = np.maximum(y1[i], y1[idxs[:last]])
-        xx2 = np.minimum(x2[i], x2[idxs[:last]])
-        yy2 = np.minimum(y2[i], y2[idxs[:last]])
-
-        # compute the width and height of the bounding box
-        w = np.maximum(0, xx2 - xx1 + 1)
-        h = np.maximum(0, yy2 - yy1 + 1)
-
-        inter = w * h
-        if mode == 'Min':
-            overlap = inter / np.minimum(area[i], area[idxs[:last]])
-        else:
-            overlap = inter / (area[i] + area[idxs[:last]] - inter)
-
-        # delete all indexes from the index list that have
-        idxs = np.delete(
-            idxs,
-            np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
-
-    return pick
-
-
-def adjust_input(in_data):
-    """
-        adjust the input from (h, w, c) to ( 1, c, h, w) for network input
-
-    Parameters:
-    ----------
-        in_data: numpy array of shape (h, w, c)
-            input data
-    Returns:
-    -------
-        out_data: numpy array of shape (1, c, h, w)
-            reshaped array
-    """
-    if in_data.dtype is not np.dtype('float32'):
-        out_data = in_data.astype(np.float32)
-    else:
-        out_data = in_data
-
-    out_data = out_data.transpose((2, 0, 1))
-    out_data = np.expand_dims(out_data, 0)
-    out_data = (out_data - 127.5) * 0.0078125
-    return out_data
-
-
-def generate_bbox(map, reg, scale, threshold):
-    """
-         generate bbox from feature map
-     Parameters:
-     ----------
-         map: numpy array , n x m x 1
-             detect score for each position
-         reg: numpy array , n x m x 4
-             bbox
-         scale: float number
-             scale of this detection
-         threshold: float number
-             detect threshold
-     Returns:
-     -------
-         bbox array
-     """
-    stride = 2
-    cellsize = 12
-
-    t_index = np.where(map > threshold)
-
-    # find nothing
-    if t_index[0].size == 0:
-        return np.array([])
-
-    dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
-
-    reg = np.array([dx1, dy1, dx2, dy2])
-    score = map[t_index[0], t_index[1]]
-    boundingbox = np.vstack([
-        np.round((stride * t_index[1] + 1) / scale),
-        np.round((stride * t_index[0] + 1) / scale),
-        np.round((stride * t_index[1] + 1 + cellsize) / scale),
-        np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
-    ])
-
-    return boundingbox.T
-
-
-def detect_first_stage(img, net, scale, threshold):
-    """
-        run PNet for first stage
-    
-    Parameters:
-    ----------
-        img: numpy array, bgr order
-            input image
-        scale: float number
-            how much should the input image scale
-        net: PNet
-            worker
-    Returns:
-    -------
-        total_boxes : bboxes
-    """
-    height, width, _ = img.shape
-    hs = int(math.ceil(height * scale))
-    ws = int(math.ceil(width * scale))
-
-    im_data = cv2.resize(img, (ws, hs))
-
-    # adjust for the network input
-    input_buf = adjust_input(im_data)
-    output = net.predict(input_buf)
-    boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
-
-    if boxes.size == 0:
-        return None
-
-    # nms
-    pick = nms(boxes[:, 0:5], 0.5, mode='Union')
-    boxes = boxes[pick]
-    return boxes
-
-
-def detect_first_stage_warpper(args):
-    return detect_first_stage(*args)
--- a/deploy/mtcnn_detector.py
+++ b/deploy/mtcnn_detector.py
@@ -11,8 +11,171 @@ try:
 except ImportError:
    izip = zip

-from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
+def nms(boxes, overlap_threshold, mode='Union'):
+    """
+        non max suppression

+    Parameters:
+    ----------
+        box: numpy array n x 5
+            input bbox array
+        overlap_threshold: float number
+            threshold of overlap
+        mode: float number
+            how to compute overlap ratio, 'Union' or 'Min'
+    Returns:
+    -------
+        index array of the selected bbox
+    """
+    # if there are no boxes, return an empty list
+    if len(boxes) == 0:
+        return []
+
+    # if the bounding boxes integers, convert them to floats
+    if boxes.dtype.kind == "i":
+        boxes = boxes.astype("float")
+
+    # initialize the list of picked indexes
+    pick = []
+
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(score)
+
+    # keep looping while some indexes still remain in the indexes list
+    while len(idxs) > 0:
+        # grab the last index in the indexes list and add the index value to the list of picked indexes
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        # compute the width and height of the bounding box
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        inter = w * h
+        if mode == 'Min':
+            overlap = inter / np.minimum(area[i], area[idxs[:last]])
+        else:
+            overlap = inter / (area[i] + area[idxs[:last]] - inter)
+
+        # delete all indexes from the index list that have
+        idxs = np.delete(
+            idxs,
+            np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
+
+    return pick
+
+
+def adjust_input(in_data):
+    """
+        adjust the input from (h, w, c) to ( 1, c, h, w) for network input
+
+    Parameters:
+    ----------
+        in_data: numpy array of shape (h, w, c)
+            input data
+    Returns:
+    -------
+        out_data: numpy array of shape (1, c, h, w)
+            reshaped array
+    """
+    if in_data.dtype is not np.dtype('float32'):
+        out_data = in_data.astype(np.float32)
+    else:
+        out_data = in_data
+
+    out_data = out_data.transpose((2, 0, 1))
+    out_data = np.expand_dims(out_data, 0)
+    out_data = (out_data - 127.5) * 0.0078125
+    return out_data
+
+
+def generate_bbox(map, reg, scale, threshold):
+    """
+         generate bbox from feature map
+     Parameters:
+     ----------
+         map: numpy array , n x m x 1
+             detect score for each position
+         reg: numpy array , n x m x 4
+             bbox
+         scale: float number
+             scale of this detection
+         threshold: float number
+             detect threshold
+     Returns:
+     -------
+         bbox array
+     """
+    stride = 2
+    cellsize = 12
+
+    t_index = np.where(map > threshold)
+
+    # find nothing
+    if t_index[0].size == 0:
+        return np.array([])
+
+    dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
+
+    reg = np.array([dx1, dy1, dx2, dy2])
+    score = map[t_index[0], t_index[1]]
+    boundingbox = np.vstack([
+        np.round((stride * t_index[1] + 1) / scale),
+        np.round((stride * t_index[0] + 1) / scale),
+        np.round((stride * t_index[1] + 1 + cellsize) / scale),
+        np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
+    ])
+
+    return boundingbox.T
+
+
+def detect_first_stage(img, net, scale, threshold):
+    """
+        run PNet for first stage
+    
+    Parameters:
+    ----------
+        img: numpy array, bgr order
+            input image
+        scale: float number
+            how much should the input image scale
+        net: PNet
+            worker
+    Returns:
+    -------
+        total_boxes : bboxes
+    """
+    height, width, _ = img.shape
+    hs = int(math.ceil(height * scale))
+    ws = int(math.ceil(width * scale))
+
+    im_data = cv2.resize(img, (ws, hs))
+
+    # adjust for the network input
+    input_buf = adjust_input(im_data)
+    output = net.predict(input_buf)
+    boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
+
+    if boxes.size == 0:
+        return None
+
+    # nms
+    pick = nms(boxes[:, 0:5], 0.5, mode='Union')
+    boxes = boxes[pick]
+    return boxes
+
+
+def detect_first_stage_warpper(args):
+    return detect_first_stage(*args)

 class MtcnnDetector(object):
    """
@@ -698,3 +861,4 @@ class MtcnnDetector(object):
            crop_imgs.append(chips)

        return crop_imgs
+
--- a/deploy/test.py
+++ b/deploy/test.py
@@ -8,39 +8,18 @@ parser = argparse.ArgumentParser(description='face model test')
 # general
 parser.add_argument('--image-size', default='112,112', help='')
 parser.add_argument('--model', default='', help='path to load model.')
-parser.add_argument('--ga-model', default='', help='path to load model.')
 parser.add_argument('--gpu', default=0, type=int, help='gpu id')
-parser.add_argument(
-    '--det',
-    default=0,
-    type=int,
-    help='mtcnn option, 1 means using R+O, 0 means detect from begining')
-parser.add_argument('--flip',
-                    default=0,
-                    type=int,
-                    help='whether do lr flip aug')
-parser.add_argument('--threshold',
-                    default=1.24,
-                    type=float,
-                    help='ver dist threshold')
 args = parser.parse_args()

-model = face_model.FaceModel(args)
+vec = args.model.split(',')
+model_prefix = vec[0]
+model_epoch = int(vec[1])
+model = face_model.FaceModel(args.gpu, model_prefix, model_epoch)
 img = cv2.imread('Tom_Hanks_54745.png')
 img = model.get_input(img)
-#f1 = model.get_feature(img)
-#print(f1[0:10])
-gender, age = model.get_ga(img)
-print(gender)
-print(age)
-sys.exit(0)
-img = cv2.imread(
-    '/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54733.png'
-)
+
+f1 = model.get_feature(img)
 f2 = model.get_feature(img)
-dist = np.sum(np.square(f1 - f2))
-print(dist)
-sim = np.dot(f1, f2.T)
-print(sim)
-#diff = np.subtract(source_feature, target_feature)
-#dist = np.sum(np.square(diff),1)
+sim = np.dot(f1, f2)
+assert(sim>=0.99 and sim<1.01)
+