This commit is contained in:
nttstar
2019-10-16 14:37:38 +08:00
60 changed files with 8579 additions and 2299 deletions

6
PRNet.mxnet/README.md Normal file
View File

@@ -0,0 +1,6 @@
MXNet implementation of [Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network](http://openaccess.thecvf.com/content_ECCV_2018/papers/Yao_Feng_Joint_3D_Face_ECCV_2018_paper.pdf).
Original [PyTorch implementation](https://github.com/YadiraF/PRNet)
Pretrained Models and details coming soon.

89
PRNet.mxnet/config.py Normal file
View File

@@ -0,0 +1,89 @@
import numpy as np
from easydict import EasyDict as edict
config = edict()
#default training/dataset config
config.num_classes = 3
config.input_img_size = 256
config.output_label_size = 64
# network settings
network = edict()
network.hourglass = edict()
network.hourglass.net_sta = 0
network.hourglass.net_n = 4
network.hourglass.net_dcn = 0
network.hourglass.net_stacks = 1
network.hourglass.net_block = 'resnet'
network.hourglass.net_binarize = False
network.hourglass.losstype = 'heatmap'
network.hourglass.multiplier = 1.0
network.prnet = edict()
network.prnet.net_sta = 0
network.prnet.net_n = 5
network.prnet.net_dcn = 0
network.prnet.net_stacks = 1
network.prnet.net_modules = 2
network.prnet.net_block = 'hpm'
network.prnet.net_binarize = False
network.prnet.losstype = 'heatmap'
network.prnet.multiplier = 0.25
network.hpm = edict()
network.hpm.net_sta = 0
network.hpm.net_n = 4
network.hpm.net_dcn = 0
network.hpm.net_stacks = 1
network.hpm.net_block = 'hpm'
network.hpm.net_binarize = False
network.hpm.losstype = 'heatmap'
network.hpm.multiplier = 1.0
# dataset settings
dataset = edict()
dataset.prnet = edict()
dataset.prnet.dataset = '3D'
dataset.prnet.landmark_type = 'dense'
dataset.prnet.dataset_path = './data64'
dataset.prnet.num_classes = 3
dataset.prnet.input_img_size = 256
dataset.prnet.output_label_size = 64
#dataset.prnet.label_xfirst = False
dataset.prnet.val_targets = ['']
# default settings
default = edict()
# default network
default.network = 'hpm'
default.pretrained = ''
default.pretrained_epoch = 0
# default dataset
default.dataset = 'prnet'
default.frequent = 20
default.verbose = 200
default.kvstore = 'device'
default.prefix = 'model/A'
default.end_epoch = 10000
default.lr = 0.00025
default.wd = 0.0
default.per_batch_size = 20
default.lr_step = '16000,24000,30000'
def generate_config(_network, _dataset):
for k, v in network[_network].items():
config[k] = v
default[k] = v
for k, v in dataset[_dataset].items():
config[k] = v
default[k] = v
config.network = _network
config.dataset = _dataset

164
PRNet.mxnet/data.py Normal file
View File

@@ -0,0 +1,164 @@
# pylint: skip-file
import mxnet as mx
import numpy as np
import sys, os
import random
import glob
import math
import scipy.misc
import cv2
import logging
import sklearn
import datetime
import img_helper
from mxnet.io import DataIter
from mxnet import ndarray as nd
from mxnet import io
from mxnet import recordio
from PIL import Image
from config import config
from skimage import transform as tf
class FaceSegIter(DataIter):
def __init__(self, path, batch_size,
per_batch_size = 0,
aug_level = 0,
force_mirror = False,
exf = 1,
args = None):
self.aug_level = aug_level
self.force_mirror = force_mirror
self.exf = exf
self.batch_size = batch_size
self.per_batch_size = per_batch_size
self.image_file_list = []
self.uv_file_list = []
for _file in glob.glob(os.path.join(path, '*.jpg')):
self.image_file_list.append(_file)
for img in self.image_file_list:
uv_file = img[0:-3]+"npy"
self.uv_file_list.append(uv_file)
self.seq = range(len(self.image_file_list))
print('train size', len(self.seq))
self.cur = 0
self.reset()
self.data_shape = (3, config.input_img_size, config.input_img_size)
self.num_classes = config.num_classes
self.input_img_size = config.input_img_size
#self.label_classes = self.num_classes
self.output_label_size = config.output_label_size
#if aug_level>0:
# self.output_label_size = config.output_label_size
#else:
# self.output_label_size = self.input_img_size
self.label_shape = (self.num_classes, self.output_label_size, self.output_label_size)
self.provide_data = [('data', (batch_size,) + self.data_shape)]
self.provide_label = [('softmax_label', (batch_size,) + self.label_shape),
('mask_label', (batch_size,)+ self.label_shape)]
weight_mask = cv2.imread('./uv-data/uv_weight_mask.png')
print('weight_mask', weight_mask.shape)
if weight_mask.shape[0]!=self.output_label_size:
weight_mask = cv2.resize(weight_mask, (self.output_label_size, self.output_label_size) )
#idx = np.where(weight_mask>0)[0]
#print('weight idx', idx)
weight_mask = weight_mask.astype(np.float32)
weight_mask /= 255.0
vis_mask = cv2.imread('./uv-data/uv_face_mask.png')
print('vis_mask', vis_mask.shape)
if vis_mask.shape[0]!=self.output_label_size:
vis_mask = cv2.resize(vis_mask, (self.output_label_size, self.output_label_size) )
vis_mask = vis_mask.astype(np.float32)
vis_mask /= 255.0
weight_mask *= vis_mask
print('weight_mask', weight_mask.shape)
weight_mask = weight_mask.transpose( (2,0,1) )
#WM = np.zeros( (batch_size,)+self.label_shape, dtype=np.float32 )
#for i in range(batch_size):
# WM[i] = weight_mask
#weight_mask = WM
#weight_mask = weight_mask.reshape( (1, 3, weight_mask.shape[0], weight_mask.shape[1]) )
weight_mask = weight_mask[np.newaxis,:,:,:]
print('weight_mask', weight_mask.shape)
weight_mask = np.tile(weight_mask, (batch_size,1,1,1))
print('weight_mask', weight_mask.shape)
self.weight_mask = nd.array(weight_mask)
self.img_num = 0
self.invalid_num = 0
self.mode = 1
self.vis = 0
self.stats = [0,0]
def get_data_shape(self):
return self.data_shape
#def get_label_shape(self):
# return self.label_shape
def get_shape_dict(self):
D = {}
for (k,v) in self.provide_data:
D[k] = v
for (k,v) in self.provide_label:
D[k] = v
return D
def get_label_names(self):
D = []
for (k,v) in self.provide_label:
D.append(k)
return D
def reset(self):
#print('reset')
self.cur = 0
if self.aug_level>0:
random.shuffle(self.seq)
def next_sample(self):
"""Helper function for reading in next sample."""
if self.cur >= len(self.seq):
raise StopIteration
idx = self.seq[self.cur]
self.cur += 1
uv_path = self.uv_file_list[idx]
image_path = self.image_file_list[idx]
uvmap = np.load(uv_path)
img = cv2.imread(image_path)[:,:,::-1]#to rgb
hlabel = uvmap
#print(hlabel.shape)
#hlabel = np.array(header.label).reshape( (self.output_label_size, self.output_label_size, self.num_classes) )
hlabel /= self.input_img_size
return img, hlabel
def next(self):
"""Returns the next batch of data."""
#print('next')
batch_size = self.batch_size
batch_data = nd.empty((batch_size,)+self.data_shape)
batch_label = nd.empty((batch_size,)+self.label_shape)
i = 0
#self.cutoff = random.randint(800,1280)
try:
while i < batch_size:
#print('N', i)
data, label = self.next_sample()
data = nd.array(data)
data = nd.transpose(data, axes=(2, 0, 1))
label = nd.array(label)
label = nd.transpose(label, axes=(2, 0, 1))
batch_data[i][:] = data
batch_label[i][:] = label
i += 1
except StopIteration:
if i<batch_size:
raise StopIteration
#return {self.data_name : batch_data,
# self.label_name : batch_label}
#print(batch_data.shape, batch_label.shape)
return mx.io.DataBatch([batch_data], [batch_label, self.weight_mask], batch_size - i)

99
PRNet.mxnet/metric.py Normal file
View File

@@ -0,0 +1,99 @@
import mxnet as mx
import numpy as np
import math
import cv2
from config import config
class LossValueMetric(mx.metric.EvalMetric):
def __init__(self):
self.axis = 1
super(LossValueMetric, self).__init__(
'lossvalue', axis=self.axis,
output_names=None, label_names=None)
self.losses = []
def update(self, labels, preds):
loss = preds[0].asnumpy()
self.sum_metric += np.mean(loss)
self.num_inst += 1.0
class NMEMetric(mx.metric.EvalMetric):
def __init__(self):
self.axis = 1
super(NMEMetric, self).__init__(
'NME', axis=self.axis,
output_names=None, label_names=None)
#self.losses = []
self.count = 0
def cal_nme(self, label, pred_label):
nme = []
for b in xrange(pred_label.shape[0]):
record = [None]*6
item = []
if label.ndim==4:
_heatmap = label[b][36]
if np.count_nonzero(_heatmap)==0:
continue
else:#ndim==3
#print(label[b])
if np.count_nonzero(label[b])==0:
continue
for p in xrange(pred_label.shape[1]):
if label.ndim==4:
heatmap_gt = label[b][p]
ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None), heatmap_gt.shape)
ind_gt = np.array(ind_gt)
else:
ind_gt = label[b][p]
#ind_gt = ind_gt.astype(np.int)
#print(ind_gt)
heatmap_pred = pred_label[b][p]
heatmap_pred = cv2.resize(heatmap_pred, (config.input_img_size, config.input_img_size))
ind_pred = np.unravel_index(np.argmax(heatmap_pred, axis=None), heatmap_pred.shape)
ind_pred = np.array(ind_pred)
#print(ind_gt.shape)
#print(ind_pred)
if p==36:
#print('b', b, p, ind_gt, np.count_nonzero(heatmap_gt))
record[0] = ind_gt
elif p==39:
record[1] = ind_gt
elif p==42:
record[2] = ind_gt
elif p==45:
record[3] = ind_gt
if record[4] is None or record[5] is None:
record[4] = ind_gt
record[5] = ind_gt
else:
record[4] = np.minimum(record[4], ind_gt)
record[5] = np.maximum(record[5], ind_gt)
#print(ind_gt.shape, ind_pred.shape)
value = np.sqrt(np.sum(np.square(ind_gt - ind_pred)))
item.append(value)
_nme = np.mean(item)
if config.landmark_type=='2d':
left_eye = (record[0]+record[1])/2
right_eye = (record[2]+record[3])/2
_dist = np.sqrt(np.sum(np.square(left_eye - right_eye)))
#print('eye dist', _dist, left_eye, right_eye)
_nme /= _dist
else:
#_dist = np.sqrt(float(label.shape[2]*label.shape[3]))
_dist = np.sqrt(np.sum(np.square(record[5] - record[4])))
#print(_dist)
_nme /= _dist
nme.append(_nme)
return np.mean(nme)
def update(self, labels, preds):
self.count+=1
label = labels[0].asnumpy()
pred_label = preds[-1].asnumpy()
nme = self.cal_nme(label, pred_label)
#print('nme', nme)
#nme = np.mean(nme)
self.sum_metric += np.mean(nme)
self.num_inst += 1.0

View File

@@ -0,0 +1,435 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import mxnet as mx
import numpy as np
from config import config
ACT_BIT = 1
bn_mom = 0.9
workspace = 256
memonger = False
def Conv(**kwargs):
body = mx.sym.Convolution(**kwargs)
return body
def Act(data, act_type, name):
if act_type=='prelu':
body = mx.sym.LeakyReLU(data = data, act_type='prelu', name = name)
else:
body = mx.symbol.Activation(data=data, act_type=act_type, name=name)
return body
#def lin(data, num_filter, workspace, name, binarize, dcn):
# bit = 1
# if not binarize:
# if not dcn:
# conv1 = Conv(data=data, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
# no_bias=True, workspace=workspace, name=name + '_conv')
# bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
# return act1
# else:
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
# conv1_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = act1,
# num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
# conv1 = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=act1, offset=conv1_offset,
# num_filter=num_filter, pad=(1,1), kernel=(3, 3), num_deformable_group=1, stride=(1, 1), dilate=(1, 1), no_bias=False)
# #conv1 = Conv(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
# # no_bias=False, workspace=workspace, name=name + '_conv')
# return conv1
# else:
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
# conv1 = mx.sym.QConvolution_v1(data=act1, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
# no_bias=True, workspace=workspace, name=name + '_conv', act_bit=ACT_BIT, weight_bit=bit)
# conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
# return conv1
def lin3(data, num_filter, workspace, name, k, g=1, d=1):
if k!=3:
conv1 = Conv(data=data, num_filter=num_filter, kernel=(k,k), stride=(1,1), pad=((k-1)//2,(k-1)//2), num_group=g,
no_bias=True, workspace=workspace, name=name + '_conv')
else:
conv1 = Conv(data=data, num_filter=num_filter, kernel=(k,k), stride=(1,1), pad=(d,d), num_group=g, dilate=(d, d),
no_bias=True, workspace=workspace, name=name + '_conv')
bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
ret = act1
return ret
def ConvFactory(data, num_filter, kernel, stride=(1, 1), pad=(0, 0), act_type="relu", mirror_attr={}, with_act=True, dcn=False, name=''):
if not dcn:
conv = mx.symbol.Convolution(
data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, workspace=workspace, name=name+'_conv')
else:
conv_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = data,
num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
conv = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=data, offset=conv_offset,
num_filter=num_filter, pad=(1,1), kernel=(3,3), num_deformable_group=1, stride=stride, dilate=(1, 1), no_bias=False)
bn = mx.symbol.BatchNorm(data=conv, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name+'_bn')
if with_act:
act = Act(bn, act_type, name=name+'_relu')
#act = mx.symbol.Activation(
# data=bn, act_type=act_type, attr=mirror_attr, name=name+'_relu')
return act
else:
return bn
class CAB:
def __init__(self, data, nFilters, nModules, n, workspace, name, dilate, group):
self.data = data
self.nFilters = nFilters
self.nModules = nModules
self.n = n
self.workspace = workspace
self.name = name
self.dilate = dilate
self.group = group
self.sym_map = {}
def get_output(self, w, h):
key = (w, h)
if key in self.sym_map:
return self.sym_map[key]
ret = None
if h==self.n:
if w==self.n:
ret = (self.data, self.nFilters)
else:
x = self.get_output(w+1, h)
f = int(x[1]*0.5)
if w!=self.n-1:
body = lin3(x[0], f, self.workspace, "%s_w%d_h%d_1"%(self.name, w, h), 3, self.group, 1)
else:
body = lin3(x[0], f, self.workspace, "%s_w%d_h%d_1"%(self.name, w, h), 3, self.group, self.dilate)
ret = (body,f)
else:
x = self.get_output(w+1, h+1)
y = self.get_output(w, h+1)
if h%2==1 and h!=w:
xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
#xbody = xbody+x[0]
else:
xbody = x[0]
#xbody = x[0]
#xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
if w==0:
ybody = lin3(y[0], y[1], self.workspace, "%s_w%d_h%d_3"%(self.name, w, h), 3, self.group)
else:
ybody = y[0]
ybody = mx.sym.concat(y[0], ybody, dim=1)
body = mx.sym.add_n(xbody,ybody, name="%s_w%d_h%d_add"%(self.name, w, h))
body = body/2
ret = (body, x[1])
self.sym_map[key] = ret
return ret
def get(self):
return self.get_output(1, 1)[0]
def conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
bit = 1
#print('in unit2')
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
no_bias=True, workspace=workspace, name=name + '_conv1')
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
conv2 = Conv(data=act2, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(1,1),
no_bias=True, workspace=workspace, name=name + '_conv2')
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
conv3 = Conv(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
workspace=workspace, name=name + '_conv3')
#if binarize:
# conv3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn4')
if dim_match:
shortcut = data
else:
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
workspace=workspace, name=name+'_sc')
if memonger:
shortcut._set_attr(mirror_stage='True')
return conv3 + shortcut
def conv_prnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
#print('in unit2')
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
no_bias=True, workspace=workspace, name=name + '_conv1')
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
conv2 = Conv(data=act2, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(1,1),
no_bias=True, workspace=workspace, name=name + '_conv2')
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
conv3 = Conv(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
workspace=workspace, name=name + '_conv3')
if dim_match:
shortcut = data
else:
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
workspace=workspace, name=name+'_sc')
if memonger:
shortcut._set_attr(mirror_stage='True')
return conv3 + shortcut
def conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilation, **kwargs):
bit = 1
#print('in unit2')
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
no_bias=True, workspace=workspace, name=name + '_conv1')
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
conv2 = Conv(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
no_bias=True, workspace=workspace, name=name + '_conv2')
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
conv3 = Conv(data=act3, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
no_bias=True, workspace=workspace, name=name + '_conv3')
conv4 = mx.symbol.Concat(*[conv1, conv2, conv3])
if dim_match:
shortcut = data
else:
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
workspace=workspace, name=name+'_sc')
if memonger:
shortcut._set_attr(mirror_stage='True')
return conv4 + shortcut
def block17(net, input_num_channels, scale=1.0, with_act=True, act_type='relu', mirror_attr={}, name=''):
tower_conv = ConvFactory(net, 192, (1, 1), name=name+'_conv')
tower_conv1_0 = ConvFactory(net, 129, (1, 1), name=name+'_conv1_0')
tower_conv1_1 = ConvFactory(tower_conv1_0, 160, (1, 7), pad=(1, 2), name=name+'_conv1_1')
tower_conv1_2 = ConvFactory(tower_conv1_1, 192, (7, 1), pad=(2, 1), name=name+'_conv1_2')
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
tower_out = ConvFactory(
tower_mixed, input_num_channels, (1, 1), with_act=False, name=name+'_conv_out')
net = net+scale * tower_out
if with_act:
act = mx.symbol.Activation(
data=net, act_type=act_type, attr=mirror_attr)
return act
else:
return net
def block35(net, input_num_channels, scale=1.0, with_act=True, act_type='relu', mirror_attr={}, name=''):
M = 1.0
tower_conv = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv')
tower_conv1_0 = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv1_0')
tower_conv1_1 = ConvFactory(tower_conv1_0, int(input_num_channels*0.25*M), (3, 3), pad=(1, 1), name=name+'_conv1_1')
tower_conv2_0 = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv2_0')
tower_conv2_1 = ConvFactory(tower_conv2_0, int(input_num_channels*0.375*M), (3, 3), pad=(1, 1), name=name+'_conv2_1')
tower_conv2_2 = ConvFactory(tower_conv2_1, int(input_num_channels*0.5*M), (3, 3), pad=(1, 1), name=name+'_conv2_2')
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_1, tower_conv2_2])
tower_out = ConvFactory(
tower_mixed, input_num_channels, (1, 1), with_act=False, name=name+'_conv_out')
net = net+scale * tower_out
if with_act:
act = mx.symbol.Activation(
data=net, act_type=act_type, attr=mirror_attr)
return act
else:
return net
def conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
assert not binarize
if stride[0]>1 or not dim_match:
return conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs)
conv4 = block35(data, num_filter, name=name+'_block35')
return conv4
def conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
if stride[0]>1 or not dim_match:
return conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs)
cab = CAB(data, num_filter, 1, 4, workspace, name, dilate, 1)
return cab.get()
def conv_block(data, num_filter, stride, dim_match, name, binarize, dcn, dilate):
if config.net_block=='resnet':
return conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
elif config.net_block=='inception':
return conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
elif config.net_block=='hpm':
return conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
elif config.net_block=='cab':
return conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
elif config.net_block=='prnet':
return conv_prnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
s = 2
_dcn = False
up1 = data
for i in xrange(nModules):
up1 = conv_block(up1, nFilters, (1,1), True, "%s_up1_%d"%(name,i), binarize, _dcn, 1)
low1 = mx.sym.Pooling(data=data, kernel=(s, s), stride=(s,s), pad=(0,0), pool_type='max')
#low1 = ConvFactory(data, nFilters, (4,4), stride=(2,2), pad=(1,1), name=name+'_conv')
#low1 = ConvFactory(data, nFilters, (3,3), stride=(2,2), pad=(1,1), name=name+'_conv')
#low1 = ConvFactory(up1, nFilters, (3,3), stride=(2,2), pad=(1,1), name=name+'_conv')
for i in xrange(nModules):
low1 = conv_block(low1, nFilters, (1,1), True, "%s_low1_%d"%(name,i), binarize, _dcn, 1)
if n>1:
low2 = hourglass(low1, nFilters, nModules, n-1, workspace, "%s_%d"%(name, n-1), binarize, dcn)
else:
low2 = low1
for i in xrange(nModules):
low2 = conv_block(low2, nFilters, (1,1), True, "%s_low2_%d"%(name,i), binarize, _dcn, 1) #TODO
low3 = low2
for i in xrange(nModules):
low3 = conv_block(low3, nFilters, (1,1), True, "%s_low3_%d"%(name,i), binarize, _dcn, 1)
up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='nearest', workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
#up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='bilinear', num_filter=nFilters, workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
#up2 = mx.symbol.Deconvolution(data=low3, num_filter=nFilters, kernel=(s*2,s*2),
# stride=(s, s), pad=(s//2, s//2),
# name='%s_upsampling_%s'%(name,n),
# attr={'lr_mult': '0.1'})
#return mx.symbol.add_n(up1, up2)
return up2
def prnet_loss(pred, gt_label, mask_label):
loss = pred - gt_label
#loss = mx.symbol.smooth_l1(loss, scalar=3.0)
loss = mx.symbol.abs(loss)
loss = mx.symbol.broadcast_mul(loss, mask_label)
#loss = mx.symbol.mean(loss, axis=0)
#loss = loss*loss
#loss = mx.symbol.mean(loss)
return loss
def ce_loss(x, y):
#loss = mx.sym.SoftmaxOutput(data = x, label = y, normalization='valid', multi_output=True)
x_max = mx.sym.max(x, axis=[2,3], keepdims=True)
x = mx.sym.broadcast_minus(x, x_max)
body = mx.sym.exp(x)
sums = mx.sym.sum(body, axis=[2,3], keepdims=True)
body = mx.sym.broadcast_div(body, sums)
loss = mx.sym.log(body)
loss = loss*y*-1.0
#loss = mx.symbol.mean(loss, axis=[1,2,3])
loss = mx.symbol.mean(loss)
return loss
def get_symbol(num_classes):
m = config.multiplier
sFilters = max(int(64*m), 16)
mFilters = max(int(128*m), 32)
nFilters = int(256*m)
nModules = config.net_modules
nStacks = config.net_stacks
binarize = config.net_binarize
input_size = config.input_img_size
label_size = config.output_label_size
use_STA = config.net_sta
N = config.net_n
DCN = config.net_dcn
per_batch_size = config.per_batch_size
print('binarize', binarize)
print('use_STA', use_STA)
print('use_N', N)
print('use_DCN', DCN)
print('per_batch_size', per_batch_size)
#assert(label_size==64 or label_size==32)
#assert(input_size==128 or input_size==256)
D = input_size // label_size
print(input_size, label_size, D)
data = mx.sym.Variable(name='data')
data = data-127.5
data = data*0.0078125
gt_label = mx.symbol.Variable(name='softmax_label')
mask_label = mx.symbol.Variable(name='mask_label')
losses = []
closses = []
#body = Conv(data=data, num_filter=sFilters, kernel=(3, 3), stride=(1,1), pad=(1, 1),
# no_bias=True, name="conv0", workspace=workspace)
body = Conv(data=data, num_filter=sFilters, kernel=(7,7), stride=(2,2), pad=(3,3),
no_bias=True, name="conv0", workspace=workspace)
#body = Conv(data=data, num_filter=sFilters, kernel=(4,4), stride=(2,2), pad=(1,1),
# no_bias=True, name="conv0", workspace=workspace)
body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
body = Act(data=body, act_type='relu', name='relu0')
dcn = False
body = conv_block(body, mFilters, (1,1), sFilters==mFilters, 'res0', False, dcn, 1)
body = mx.sym.Pooling(data=body, kernel=(2, 2), stride=(2,2), pad=(0,0), pool_type='max')
#body = Conv(data=body, num_filter=mFilters, kernel=(4,4), stride=(2,2), pad=(1,1),
# no_bias=True, name="conv1", workspace=workspace)
#body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
#body = Act(data=body, act_type='relu', name='relu1')
#body = conv_block(body, mFilters, (1,1), True, 'res1', False, dcn, 1) #TODO
body = conv_block(body, nFilters, (1,1), mFilters==nFilters, 'res2', binarize, dcn, 1) #binarize=True?
heatmap = None
outs = []
body = hourglass(body, nFilters, nModules, config.net_n, workspace, 'stack0_hg', binarize, dcn)
for j in xrange(nModules):
body = conv_block(body, nFilters, (1,1), True, 'stack0_unit%d'%(j), binarize, dcn, 1)
_dcn = False
ll = ConvFactory(body, nFilters, (1,1), dcn = _dcn, name='stack0_ll')
_name = 'heatmap'
pred = Conv(data=ll, num_filter=num_classes, kernel=(1, 1), stride=(1,1), pad=(0,0),
name=_name, workspace=workspace)
loss = prnet_loss(pred, gt_label, mask_label)
outs.append(mx.sym.MakeLoss(loss))
pred = mx.symbol.BlockGrad(pred)
#loss = mx.symbol.add_n(*losses)
#loss = mx.symbol.MakeLoss(loss)
#syms = [loss]
outs.append(pred)
sym = mx.symbol.Group( outs )
return sym
def init_weights(sym, data_shape_dict):
#print('in hg')
arg_name = sym.list_arguments()
aux_name = sym.list_auxiliary_states()
arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
arg_shape_dict = dict(zip(arg_name, arg_shape))
aux_shape_dict = dict(zip(aux_name, aux_shape))
#print(aux_shape)
#print(aux_params)
#print(arg_shape_dict)
arg_params = {}
aux_params = {}
for k,v in arg_shape_dict.iteritems():
#print(k,v)
if k.endswith('offset_weight') or k.endswith('offset_bias'):
print('initializing',k)
arg_params[k] = mx.nd.zeros(shape = v)
elif k.startswith('fc6_'):
if k.endswith('_weight'):
print('initializing',k)
arg_params[k] = mx.random.normal(0, 0.01, shape=v)
elif k.endswith('_bias'):
print('initializing',k)
arg_params[k] = mx.nd.zeros(shape=v)
elif k.find('upsampling')>=0:
print('initializing upsampling_weight', k)
arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
init = mx.init.Initializer()
init._init_bilinear(k, arg_params[k])
return arg_params, aux_params

215
PRNet.mxnet/train.py Normal file
View File

@@ -0,0 +1,215 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import argparse
from data import FaceSegIter
import mxnet as mx
import mxnet.optimizer as optimizer
import numpy as np
import os
import sys
import math
import random
import cv2
from config import config, default, generate_config
from optimizer import ONadam
from metric import LossValueMetric, NMEMetric
sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
import sym_heatmap
#import sym_fc
#from symbol import fc
args = None
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def main(args):
_seed = 727
random.seed(_seed)
np.random.seed(_seed)
mx.random.seed(_seed)
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]
print('use cpu')
else:
print('gpu num:', len(ctx))
#ctx = [mx.gpu(0)]
args.ctx_num = len(ctx)
args.batch_size = args.per_batch_size*args.ctx_num
config.per_batch_size = args.per_batch_size
print('Call with', args, config)
train_iter = FaceSegIter(path = config.dataset_path,
batch_size = args.batch_size,
per_batch_size = args.per_batch_size,
aug_level = 1,
exf = args.exf,
args = args,
)
data_shape = train_iter.get_data_shape()
#label_shape = train_iter.get_label_shape()
sym = sym_heatmap.get_symbol(num_classes=config.num_classes)
if len(args.pretrained)==0:
#data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape}
data_shape_dict = train_iter.get_shape_dict()
arg_params, aux_params = sym_heatmap.init_weights(sym, data_shape_dict)
else:
vec = args.pretrained.split(',')
print('loading', vec)
_, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1]))
#sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
model = mx.mod.Module(
context = ctx,
symbol = sym,
label_names = train_iter.get_label_names(),
)
#lr = 1.0e-3
#lr = 2.5e-4
#_rescale_grad = 1.0/args.ctx_num
_rescale_grad = 1.0/args.batch_size
#lr = args.lr
#opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
if args.optimizer=='onadam':
opt = ONadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
elif args.optimizer=='nadam':
opt = optimizer.Nadam(learning_rate=args.lr, rescale_grad=_rescale_grad)
elif args.optimizer=='rmsprop':
opt = optimizer.RMSProp(learning_rate=args.lr, rescale_grad=_rescale_grad)
elif args.optimizer=='adam':
opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad)
else:
opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=args.wd, rescale_grad=_rescale_grad)
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)
_cb = mx.callback.Speedometer(args.batch_size, args.frequent)
_metric = LossValueMetric()
#_metric = NMEMetric()
#_metric2 = AccMetric()
#eval_metrics = [_metric, _metric2]
eval_metrics = [_metric]
lr_steps = [int(x) for x in args.lr_step.split(',')]
print('lr-steps', lr_steps)
global_step = [0]
def val_test():
all_layers = sym.get_internals()
vsym = all_layers['heatmap_output']
vmodel = mx.mod.Module(symbol=vsym, context=ctx, label_names = None)
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
vmodel.bind(data_shapes=[('data', (args.batch_size,)+data_shape)])
arg_params, aux_params = model.get_params()
vmodel.set_params(arg_params, aux_params)
for target in config.val_targets:
_file = os.path.join(config.dataset_path, '%s.rec'%target)
if not os.path.exists(_file):
continue
val_iter = FaceSegIter(path_imgrec = _file,
batch_size = args.batch_size,
#batch_size = 4,
aug_level = 0,
args = args,
)
_metric = LossValueMetric()
val_metric = mx.metric.create(_metric)
val_metric.reset()
val_iter.reset()
diffs = []
for i, eval_batch in enumerate(val_iter):
#print(eval_batch.data[0].shape, eval_batch.label[0].shape)
batch_data = mx.io.DataBatch(eval_batch.data)
model.forward(batch_data, is_train=False)
_label = eval_batch.label[0].asnumpy()
_pred = model.get_outputs()[-1].asnumpy()
_diff = np.abs(_pred-_label)
_diff = np.mean(_diff)*config.input_img_size
#print('pred', _pred.shape, _label.shape)
#print('diff', _diff)
diffs.append(_diff)
model.update_metric(val_metric, eval_batch.label)
nme_value = val_metric.get_name_value()[0][1]
print('[%d][%s]LOSS: %f'%(global_step[0], target, nme_value))
print('avg diff', np.mean(diffs))
def _batch_callback(param):
_cb(param)
global_step[0]+=1
mbatch = global_step[0]
for _lr in lr_steps:
if mbatch==_lr:
if args.optimizer=='sgd':
opt.lr *= 0.1
else:
opt.lr *= 0.5
print('lr change to', opt.lr)
break
if mbatch%1000==0:
print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch)
if mbatch>0 and mbatch%args.verbose==0:
val_test()
if args.ckpt==1:
msave = mbatch//args.verbose
print('saving', msave)
arg, aux = model.get_params()
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux)
if mbatch==lr_steps[-1]:
if args.ckpt==2:
#msave = mbatch//args.verbose
msave = 1
print('saving', msave)
arg, aux = model.get_params()
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux)
sys.exit(0)
train_iter = mx.io.PrefetchingIter(train_iter)
model.fit(train_iter,
begin_epoch = 0,
num_epoch = 9999,
#eval_data = val_iter,
eval_data = None,
eval_metric = eval_metrics,
kvstore = 'device',
optimizer = opt,
initializer = initializer,
arg_params = arg_params,
aux_params = aux_params,
allow_missing = True,
batch_end_callback = _batch_callback,
epoch_end_callback = None,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Train face alignment')
# general
parser.add_argument('--network', help='network name', default=default.network, type=str)
parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
args, rest = parser.parse_known_args()
generate_config(args.network, args.dataset)
parser.add_argument('--prefix', default=default.prefix, help='directory to save model.')
parser.add_argument('--pretrained', default=default.pretrained, help='')
parser.add_argument('--optimizer', default='nadam', help='')
parser.add_argument('--lr', type=float, default=default.lr, help='')
parser.add_argument('--wd', type=float, default=default.wd, help='')
parser.add_argument('--per-batch-size', type=int, default=default.per_batch_size, help='')
parser.add_argument('--lr-step', help='learning rate steps (in epoch)', default=default.lr_step, type=str)
parser.add_argument('--ckpt', type=int, default=1, help='')
parser.add_argument('--norm', type=int, default=0, help='')
parser.add_argument('--exf', type=int, default=1, help='')
parser.add_argument('--frequent', type=int, default=default.frequent, help='')
parser.add_argument('--verbose', type=int, default=default.verbose, help='')
args = parser.parse_args()
main(args)

View File

@@ -5,8 +5,10 @@ By Jia Guo and [Jiankang Deng](https://jiankangdeng.github.io/)
## License
The code of InsightFace is released under the MIT License.
The code of InsightFace is released under the MIT License. There is no limitation for both acadmic and commercial usage.
The training data containing the annotation (and the models trained with these data) are available for non-commercial research purposes only.
## ArcFace Video Demo
[![ArcFace Demo](https://github.com/deepinsight/insightface/blob/master/resources/facerecognitionfromvideo.PNG)](https://www.youtube.com/watch?v=y-D1tReryGA&t=81s)
@@ -15,10 +17,16 @@ Please click the image to watch the Youtube video. For Bilibili users, click [he
## Recent Update
**`2019.04.14`**: We will launch a Light-weight Face Recognition challenge/workshop on ICCV 2019.
**`2019.08.10`**: We achieved 2nd place at [WIDER Face Detection Challenge 2019](http://wider-challenge.org/2019.html).
**`2019.04.04`**: Arcface achieved state-of-the-art performance (5/109) on the NIST Face Recognition Vendor Test (FRVT) (1:1 verification)
[report](https://www.nist.gov/sites/default/files/documents/2019/04/04/frvt_report_2019_04_04.pdf) (name: Imperial-000). Our solution is based on [MS1MV2+DeepGlintAsian, ResNet100, ArcFace loss].
**`2019.05.30`**: [Presentation at cvmart](https://pan.baidu.com/s/1v9fFHBJ8Q9Kl9Z6GwhbY6A)
**`2019.04.30`**: Our Face detector ([RetinaFace](https://github.com/deepinsight/insightface/tree/master/RetinaFace)) obtains state-of-the-art results on [the WiderFace dataset](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html).
**`2019.04.14`**: We will launch a [Light-weight Face Recognition challenge/workshop](https://github.com/deepinsight/insightface/tree/master/iccv19-challenge) on ICCV 2019.
**`2019.04.04`**: Arcface achieved state-of-the-art performance (7/109) on the NIST Face Recognition Vendor Test (FRVT) (1:1 verification)
[report](https://www.nist.gov/sites/default/files/documents/2019/04/04/frvt_report_2019_04_04.pdf) (name: Imperial-000 and Imperial-001). Our solution is based on [MS1MV2+DeepGlintAsian, ResNet100, ArcFace loss].
**`2019.02.08`**: Please check [https://github.com/deepinsight/insightface/tree/master/recognition](https://github.com/deepinsight/insightface/tree/master/recognition) for our parallel training code which can easily and efficiently support one million identities on a single machine (8* 1080ti).
@@ -203,6 +211,7 @@ For single cropped face image(112x112), total inference time is only 17ms on our
- TensorFlow: [InsightFace_TF](https://github.com/auroua/InsightFace_TF)
- TensorFlow: [tf-insightface](https://github.com/AIInAi/tf-insightface)
- TensorFlow:[insightface](https://github.com/Fei-Wang/insightface)
- PyTorch: [InsightFace_Pytorch](https://github.com/TreB1eN/InsightFace_Pytorch)
- PyTorch: [arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch)
- Caffe: [arcface-caffe](https://github.com/xialuxi/arcface-caffe)
@@ -212,23 +221,38 @@ For single cropped face image(112x112), total inference time is only 17ms on our
## Face Alignment
Todo
Please check the [Menpo](https://github.com/jiankangdeng/MenpoBenchmark) Benchmark and [Dense U-Net](https://github.com/deepinsight/insightface/tree/master/alignment) for more details.
## Face Detection
Todo
Please check [RetinaFace](https://github.com/deepinsight/insightface/tree/master/RetinaFace) for more details.
## Citation
If you find *InsightFace* useful in your research, please consider to cite the following related papers:
```
@inproceedings{deng2019retinaface,
title={RetinaFace: Single-stage Dense Face Localisation in the Wild},
author={Deng, Jiankang and Guo, Jia and Yuxiang, Zhou and Jinke Yu and Irene Kotsia and Zafeiriou, Stefanos},
booktitle={arxiv},
year={2019}
}
@inproceedings{guo2018stacked,
title={Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment},
author={Guo, Jia and Deng, Jiankang and Xue, Niannan and Zafeiriou, Stefanos},
booktitle={BMVC},
year={2018}
}
@article{deng2018menpo,
title={The Menpo benchmark for multi-pose 2D and 3D facial landmark localisation and tracking},
author={Deng, Jiankang and Roussos, Anastasios and Chrysos, Grigorios and Ververas, Evangelos and Kotsia, Irene and Shen, Jie and Zafeiriou, Stefanos},
journal={IJCV},
year={2018}
}
@inproceedings{deng2018arcface,
title={ArcFace: Additive Angular Margin Loss for Deep Face Recognition},
author={Deng, Jiankang and Guo, Jia and Niannan, Xue and Zafeiriou, Stefanos},

View File

@@ -2,7 +2,7 @@
## Introduction
RetinaFace is a practical single-stage face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641)
RetinaFace is a practical single-stage [SOTA](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641)
![demoimg1](https://github.com/deepinsight/insightface/blob/master/resources/11513D05.jpg)
@@ -40,7 +40,7 @@ RetinaFace is a practical single-stage face detector which is initially describe
Please check ``train.py`` for training.
1. Copy ``rcnn/sample_config.py`` to ``rcnn/config.py``
2. Download pretrained models and put them into ``model/``.
2. Download ImageNet pretrained models and put them into ``model/``(these models are not for detection testing/inferencing but training and parameters initialization).
ImageNet ResNet50 ([baidu cloud](https://pan.baidu.com/s/1WAkU9ZA_j-OmzO-sdk9whA) and [dropbox](https://www.dropbox.com/s/48b850vmnaaasfl/imagenet-resnet-50.zip?dl=0)).
@@ -54,7 +54,7 @@ Before training, you can check the ``resnet`` network configuration (e.g. pretra
Please check ``test.py`` for testing.
## Models
## RetinaFace Pretrained Models
Pretrained Model: RetinaFace-R50 ([baidu cloud](https://pan.baidu.com/s/1C6nKq122gJxRhb37vK0_LQ) or [dropbox](https://www.dropbox.com/s/53ftnlarhyrpkg2/retinaface-R50.zip?dl=0)) is a medium size model with ResNet50 backbone.
It can output face bounding boxes and five facial landmarks in a single forward pass.
@@ -63,6 +63,13 @@ WiderFace validation mAP: Easy 96.5, Medium 95.6, Hard 90.4.
To avoid the confliction with the WiderFace Challenge (ICCV 2019), we postpone the release time of our best model.
## Third-party Models
[yangfly](https://github.com/yangfly): RetinaFace-MobileNet0.25 ([baidu cloud](https://pan.baidu.com/s/1P1ypO7VYUbNAezdvLm2m9w)).
WiderFace validation mAP: Hard 82.5. (model size: 1.68Mb)
[clancylian](https://github.com/clancylian/retinaface): C++ version
## References
```

View File

@@ -82,7 +82,7 @@ def get_image(roidb, scale=False):
im = im.astype(np.float32)
boxes_mask = roi_rec['boxes_mask'].copy() * im_scale
boxes_mask = boxes_mask.astype(np.int)
for j in xrange(boxes_mask.shape[0]):
for j in range(boxes_mask.shape[0]):
m = boxes_mask[j]
im_tensor[:,:,m[1]:m[3],m[0]:m[2]] = 0.0
#print('find mask', m, file=sys.stderr)
@@ -131,7 +131,7 @@ def __get_crop_image(roidb):
#im = im.astype(np.float32)
boxes_mask = roi_rec['boxes_mask'].copy()
boxes_mask = boxes_mask.astype(np.int)
for j in xrange(boxes_mask.shape[0]):
for j in range(boxes_mask.shape[0]):
m = boxes_mask[j]
im[m[1]:m[3],m[0]:m[2],:] = 0
#print('find mask', m, file=sys.stderr)
@@ -143,7 +143,7 @@ def __get_crop_image(roidb):
TARGET_BOX_SCALES = np.array([16,32,64,128,256,512])
assert roi_rec['boxes'].shape[0]>0
candidates = []
for i in xrange(roi_rec['boxes'].shape[0]):
for i in range(roi_rec['boxes'].shape[0]):
box = roi_rec['boxes'][i]
box_size = max(box[2]-box[0], box[3]-box[1])
if box_size<config.TRAIN.MIN_BOX_SIZE:
@@ -181,7 +181,7 @@ def __get_crop_image(roidb):
im = cv2.warpAffine(im, M, (SIZE, SIZE), borderValue = tuple(config.PIXEL_MEANS))
#tbox = np.array([left, left+SIZE, up, up+SIZE], dtype=np.int)
#im_new = np.zeros( (SIZE, SIZE,3), dtype=im.dtype)
#for i in xrange(3):
#for i in range(3):
# im_new[:,:,i] = config.PIXEL_MEANS[i]
new_rec['boxes'][:,0] -= left
new_rec['boxes'][:,2] -= left
@@ -192,7 +192,7 @@ def __get_crop_image(roidb):
#print('before', new_rec['boxes'].shape[0])
boxes_new = []
classes_new = []
for i in xrange(new_rec['boxes'].shape[0]):
for i in range(new_rec['boxes'].shape[0]):
box = new_rec['boxes'][i]
box_size = max(box[2]-box[0], box[3]-box[1])
center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
@@ -211,7 +211,7 @@ def __get_crop_image(roidb):
global TMP_ID
if TMP_ID<10:
tim = im.copy()
for i in xrange(new_rec['boxes'].shape[0]):
for i in range(new_rec['boxes'].shape[0]):
box = new_rec['boxes'][i].copy().astype(np.int)
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
filename = './trainimages/train%d.png' % TMP_ID
@@ -279,7 +279,7 @@ def get_crop_image1(roidb):
#im = im.astype(np.float32)
boxes_mask = roi_rec['boxes_mask'].copy()
boxes_mask = boxes_mask.astype(np.int)
for j in xrange(boxes_mask.shape[0]):
for j in range(boxes_mask.shape[0]):
m = boxes_mask[j]
im[m[1]:m[3],m[0]:m[2],:] = 127
#print('find mask', m, file=sys.stderr)
@@ -342,7 +342,7 @@ def get_crop_image1(roidb):
#print(origin_shape, im_new.shape, im_scale)
valid = []
valid_boxes = []
for i in xrange(boxes_new.shape[0]):
for i in range(boxes_new.shape[0]):
box = boxes_new[i]
#center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
centerx = (box[0]+box[2])/2
@@ -385,12 +385,12 @@ def get_crop_image1(roidb):
global TMP_ID
if TMP_ID>=0 and TMP_ID<10:
tim = im.copy().astype(np.uint8)
for i in xrange(new_rec['boxes'].shape[0]):
for i in range(new_rec['boxes'].shape[0]):
box = new_rec['boxes'][i].copy().astype(np.int)
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
print('draw box:', box)
if config.FACE_LANDMARK:
for i in xrange(new_rec['landmarks'].shape[0]):
for i in range(new_rec['landmarks'].shape[0]):
landmark = new_rec['landmarks'][i].copy()
if landmark[0][2]<0:
print('zero', landmark)
@@ -444,14 +444,14 @@ def get_crop_image2(roidb):
#im = im.astype(np.float32)
boxes_mask = roi_rec['boxes_mask'].copy()
boxes_mask = boxes_mask.astype(np.int)
for j in xrange(boxes_mask.shape[0]):
for j in range(boxes_mask.shape[0]):
m = boxes_mask[j]
im[m[1]:m[3],m[0]:m[2],:] = 0
#print('find mask', m, file=sys.stderr)
SIZE = config.SCALES[0][0]
scale_array = np.array([16,32,64,128,256,512], dtype=np.float32)
candidates = []
for i in xrange(roi_rec['boxes'].shape[0]):
for i in range(roi_rec['boxes'].shape[0]):
box = roi_rec['boxes'][i]
box_size = max(box[2]-box[0], box[3]-box[1])
if box_size<config.TRAIN.MIN_BOX_SIZE:
@@ -594,7 +594,7 @@ def get_crop_image2(roidb):
#print(origin_shape, im_new.shape, im_scale)
valid = []
valid_boxes = []
for i in xrange(boxes_new.shape[0]):
for i in range(boxes_new.shape[0]):
box = boxes_new[i]
#center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
centerx = (box[0]+box[2])/2
@@ -633,12 +633,12 @@ def get_crop_image2(roidb):
global TMP_ID
if TMP_ID>=0 and TMP_ID<10:
tim = im.copy().astype(np.uint8)
for i in xrange(new_rec['boxes'].shape[0]):
for i in range(new_rec['boxes'].shape[0]):
box = new_rec['boxes'][i].copy().astype(np.int)
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
print('draw box:', box)
if config.FACE_LANDMARK:
for i in xrange(new_rec['landmarks'].shape[0]):
for i in range(new_rec['landmarks'].shape[0]):
landmark = new_rec['landmarks'][i].copy()
if landmark[10]==0.0:
print('zero', landmark)

View File

@@ -6,7 +6,7 @@ import mxnet.autograd as ag
import numpy as np
from rcnn.config import config
from rcnn.PY_OP import rpn_fpn_ohem3
from symbol_common import get_sym_train
from rcnn.symbol.symbol_common import get_sym_train
def conv_only(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \

View File

@@ -6,7 +6,7 @@ import mxnet.autograd as ag
import numpy as np
from rcnn.config import config
from rcnn.PY_OP import rpn_fpn_ohem3
from symbol_common import get_sym_train
from rcnn.symbol.symbol_common import get_sym_train
def conv_only(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \
stride=(1,1), bias_wd_mult=0.0):

View File

@@ -159,7 +159,7 @@ class FaceSegIter(DataIter):
#ul = np.array( (50000,50000), dtype=np.int32)
#br = np.array( (0,0), dtype=np.int32)
#for i in xrange(hlabel.shape[0]):
#for i in range(hlabel.shape[0]):
# h = int(hlabel[i][0])
# w = int(hlabel[i][1])
# key = np.array((h,w))
@@ -171,9 +171,9 @@ class FaceSegIter(DataIter):
def get_flip(self, data, label):
data_flip = np.zeros_like(data)
label_flip = np.zeros_like(label)
for k in xrange(data_flip.shape[2]):
for k in range(data_flip.shape[2]):
data_flip[:,:,k] = np.fliplr(data[:,:,k])
for k in xrange(label_flip.shape[0]):
for k in range(label_flip.shape[0]):
label_flip[k,:] = np.fliplr(label[k,:])
#print(label[0,:].shape)
label_flip = label_flip[self.flip_order,:]
@@ -186,7 +186,7 @@ class FaceSegIter(DataIter):
# filename = './vis/raw_%d.jpg' % (self.img_num)
# print('save', filename)
# draw = data.copy()
# for i in xrange(label.shape[0]):
# for i in range(label.shape[0]):
# cv2.circle(draw, (label[i][1], label[i][0]), 1, (0, 0, 255), 2)
# scipy.misc.imsave(filename, draw)
@@ -223,7 +223,7 @@ class FaceSegIter(DataIter):
#data_out = img_helper.crop2(data, center, _scale, (self.input_img_size, self.input_img_size), rot=rotate)
label_out = np.zeros(self.label_shape, dtype=np.float32)
#print('out shapes', data_out.shape, label_out.shape)
for i in xrange(label.shape[0]):
for i in range(label.shape[0]):
pt = label[i].copy()
#pt = pt[::-1]
npt = img_helper.transform_pt(pt, trans)
@@ -277,7 +277,7 @@ class FaceSegIter(DataIter):
print('save', filename)
draw = data_out.copy()
alabel = label_out.copy()
for i in xrange(label.shape[0]):
for i in range(label.shape[0]):
a = cv2.resize(alabel[i], (self.input_img_size, self.input_img_size))
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
cv2.circle(draw, (ind[1], ind[0]), 1, (0, 0, 255), 2)

View File

@@ -28,7 +28,7 @@ class NMEMetric(mx.metric.EvalMetric):
def cal_nme(self, label, pred_label):
nme = []
for b in xrange(pred_label.shape[0]):
for b in range(pred_label.shape[0]):
record = [None]*6
item = []
if label.ndim==4:
@@ -39,7 +39,7 @@ class NMEMetric(mx.metric.EvalMetric):
#print(label[b])
if np.count_nonzero(label[b])==0:
continue
for p in xrange(pred_label.shape[1]):
for p in range(pred_label.shape[1]):
if label.ndim==4:
heatmap_gt = label[b][p]
ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None), heatmap_gt.shape)

View File

@@ -313,19 +313,19 @@ def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
s = 2
_dcn = False
up1 = data
for i in xrange(nModules):
for i in range(nModules):
up1 = conv_block(up1, nFilters, (1,1), True, "%s_up1_%d"%(name,i), binarize, _dcn, 1)
low1 = mx.sym.Pooling(data=data, kernel=(s, s), stride=(s,s), pad=(0,0), pool_type='max')
for i in xrange(nModules):
for i in range(nModules):
low1 = conv_block(low1, nFilters, (1,1), True, "%s_low1_%d"%(name,i), binarize, _dcn, 1)
if n>1:
low2 = hourglass(low1, nFilters, nModules, n-1, workspace, "%s_%d"%(name, n-1), binarize, dcn)
else:
low2 = low1
for i in xrange(nModules):
for i in range(nModules):
low2 = conv_block(low2, nFilters, (1,1), True, "%s_low2_%d"%(name,i), binarize, _dcn, 1) #TODO
low3 = low2
for i in xrange(nModules):
for i in range(nModules):
low3 = conv_block(low3, nFilters, (1,1), True, "%s_low3_%d"%(name,i), binarize, _dcn, 1)
up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='nearest', workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
return mx.symbol.add_n(up1, up2)
@@ -517,14 +517,14 @@ def get_symbol(num_classes):
heatmap = None
for i in xrange(nStacks):
for i in range(nStacks):
shortcut = body
if config.net_sta>0:
sta = STA(body, nFilters, nModules, config.net_n+1, workspace, 'sta%d'%(i))
body = sta.get()
else:
body = hourglass(body, nFilters, nModules, config.net_n, workspace, 'stack%d_hg'%(i), binarize, dcn)
for j in xrange(nModules):
for j in range(nModules):
body = conv_block(body, nFilters, (1,1), True, 'stack%d_unit%d'%(i,j), binarize, dcn, 1)
_dcn = True if config.net_dcn>=2 else False
ll = ConvFactory(body, nFilters, (1,1), dcn = _dcn, name='stack%d_ll'%(i))
@@ -596,7 +596,8 @@ def init_weights(sym, data_shape_dict):
#print(arg_shape_dict)
arg_params = {}
aux_params = {}
for k,v in arg_shape_dict.iteritems():
for k in arg_shape_dict:
v = arg_shape_dict[k]
#print(k,v)
if k.endswith('offset_weight') or k.endswith('offset_bias'):
print('initializing',k)

View File

@@ -54,7 +54,7 @@ class Handler:
tb = datetime.datetime.now()
print('module time cost', (tb-ta).total_seconds())
ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32)
for i in xrange(alabel.shape[0]):
for i in range(alabel.shape[0]):
a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
#ret[i] = (ind[0], ind[1]) #h, w

View File

@@ -35,7 +35,7 @@ def main(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]

View File

@@ -19,7 +19,7 @@ model = face_embedding.FaceModel(args)
img = cv2.imread('/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54745.png')
time_now = datetime.datetime.now()
for i in xrange(3000):
for i in range(3000):
f1 = model.get_feature(img)
time_now2 = datetime.datetime.now()
diff = time_now2 - time_now

View File

@@ -22,7 +22,7 @@ import face_preprocess
def do_flip(data):
for idx in xrange(data.shape[0]):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
class FaceModel:

View File

@@ -22,7 +22,7 @@ import face_preprocess
def do_flip(data):
for idx in xrange(data.shape[0]):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
def get_model(ctx, image_size, model_str, layer):

View File

@@ -1,52 +0,0 @@
import mxnet as mx
import numpy as np
import sys, os
source_dir = sys.argv[1]
input_dir = sys.argv[2]
idx_file = os.path.join(source_dir, 'traino.idx')
rec_file = os.path.join(source_dir, 'traino.rec')
writer = mx.recordio.MXIndexedRecordIO(os.path.join(source_dir,'train.idx'), os.path.join(source_dir,'train.rec'), 'w') # pylint: disable=redefined-variable-type
imgrec = mx.recordio.MXIndexedRecordIO(idx_file, rec_file, 'r') # pylint: disable=redefined-variable-type
seq = list(imgrec.keys)
widx = 0
for img_idx in seq:
s = imgrec.read_idx(img_idx)
assert widx==img_idx
writer.write_idx(widx, s)
widx+=1
stat = {}
for _file in os.listdir(input_dir):
if not _file.endswith('.rec'):
continue
rec_file = os.path.join(input_dir, _file)
print(rec_file)
idx_file = rec_file[:-4]+'.idx'
imgrec = mx.recordio.MXIndexedRecordIO(idx_file, rec_file, 'r') # pylint: disable=redefined-variable-type
seq = list(imgrec.keys)
for img_idx in seq:
if img_idx%100==0:
print(img_idx, stat)
s = imgrec.read_idx(img_idx)
header, img = mx.recordio.unpack(s)
try:
image = mx.image.imdecode(img).asnumpy()
except:
continue
age = int(header.label[0])
if age>=20:
continue
age_group = age//10
#if not age in stat:
stat[age_group] = 0
stat[age_group]+=1
label = [9999, age]
nheader = mx.recordio.IRHeader(0, label, widx, 0)
bgr = image[:,:,::-1]
s = mx.recordio.pack_img(nheader, bgr, quality=95, img_fmt='.jpg')
writer.write_idx(widx, s)
widx+=1

View File

@@ -124,7 +124,7 @@ class FaceImageIter(io.DataIter):
def mirror_aug(self, img):
_rd = random.randint(0,1)
if _rd==1:
for c in xrange(img.shape[2]):
for c in range(img.shape[2]):
img[:,:,c] = np.fliplr(img[:,:,c])
return img

View File

@@ -22,7 +22,7 @@ import face_preprocess
def do_flip(data):
for idx in xrange(data.shape[0]):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
def get_model(ctx, image_size, model_str, layer):

View File

@@ -81,7 +81,7 @@ class MAEMetric(mx.metric.EvalMetric):
pred_age = np.zeros( label_age.shape, dtype=np.int)
#pred_age = np.zeros( label_age.shape, dtype=np.float32)
pred = preds[-1].asnumpy()
for i in xrange(AGE):
for i in range(AGE):
_pred = pred[:,2+i*2:4+i*2]
_pred = np.argmax(_pred, axis=1)
#pred = pred[:,1]
@@ -107,7 +107,7 @@ class CUMMetric(mx.metric.EvalMetric):
label_age = np.count_nonzero(label[:,1:], axis=1)
pred_age = np.zeros( label_age.shape, dtype=np.int)
pred = preds[-1].asnumpy()
for i in xrange(AGE):
for i in range(AGE):
_pred = pred[:,2+i*2:4+i*2]
_pred = np.argmax(_pred, axis=1)
#pred = pred[:,1]
@@ -184,7 +184,7 @@ def train_net(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]

View File

@@ -1 +0,0 @@
Gluon interface, not totally working.

View File

@@ -1,271 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import logging
import sys
import numbers
import math
import sklearn
import datetime
import numpy as np
import cv2
import mxnet as mx
from mxnet import ndarray as nd
from mxnet import io
from mxnet import recordio
sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import face_preprocess
import multiprocessing
logger = logging.getLogger()
class FaceImageIter(io.DataIter):
def __init__(self, batch_size, data_shape,
path_imgrec = None, task = 'age',
shuffle=False, aug_list=None, mean = None,
rand_mirror = False, cutoff = 0,
data_name='data', label_name='softmax_label', **kwargs):
super(FaceImageIter, self).__init__()
assert path_imgrec
if path_imgrec:
logging.info('loading recordio %s...',
path_imgrec)
path_imgidx = path_imgrec[0:-4]+".idx"
self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
s = self.imgrec.read_idx(0)
header, _ = recordio.unpack(s)
self.imgidx = list(self.imgrec.keys)
if shuffle:
self.seq = self.imgidx
self.oseq = self.imgidx
print(len(self.seq))
else:
self.seq = None
self.mean = mean
self.nd_mean = None
if self.mean:
self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))
self.check_data_shape(data_shape)
self.provide_data = [(data_name, (batch_size,) + data_shape)]
self.batch_size = batch_size
self.data_shape = data_shape
self.shuffle = shuffle
self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
self.rand_mirror = rand_mirror
print('rand_mirror', rand_mirror)
self.cutoff = cutoff
if task=='age':
self.provide_label = [(label_name, (batch_size,100))]
else:
self.provide_label = [(label_name, (batch_size,))]
#print(self.provide_label[0][1])
self.cur = 0
self.nbatch = 0
self.is_init = False
def reset(self):
"""Resets the iterator to the beginning of the data."""
print('call reset()')
self.cur = 0
if self.shuffle:
random.shuffle(self.seq)
if self.seq is None and self.imgrec is not None:
self.imgrec.reset()
def num_samples(self):
return len(self.seq)
def next_sample(self):
"""Helper function for reading in next sample."""
#set total batch size, for example, 1800, and maximum size for each people, for example 45
if self.seq is not None:
while True:
if self.cur >= len(self.seq):
raise StopIteration
idx = self.seq[self.cur]
self.cur += 1
if self.imgrec is not None:
s = self.imgrec.read_idx(idx)
header, img = recordio.unpack(s)
label = header.label
return label, img, None, None
else:
label, fname, bbox, landmark = self.imglist[idx]
return label, self.read_image(fname), bbox, landmark
else:
s = self.imgrec.read()
if s is None:
raise StopIteration
header, img = recordio.unpack(s)
return header.label, img, None, None
def brightness_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
src *= alpha
return src
def contrast_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
coef = np.array([[[0.299, 0.587, 0.114]]])
gray = src * coef
gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
src *= alpha
src += gray
return src
def saturation_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
coef = np.array([[[0.299, 0.587, 0.114]]])
gray = src * coef
gray = np.sum(gray, axis=2, keepdims=True)
gray *= (1.0 - alpha)
src *= alpha
src += gray
return src
def color_aug(self, img, x):
augs = [self.brightness_aug, self.contrast_aug, self.saturation_aug]
random.shuffle(augs)
for aug in augs:
#print(img.shape)
img = aug(img, x)
#print(img.shape)
return img
def mirror_aug(self, img):
_rd = random.randint(0,1)
if _rd==1:
for c in xrange(img.shape[2]):
img[:,:,c] = np.fliplr(img[:,:,c])
return img
def next(self):
if not self.is_init:
self.reset()
self.is_init = True
"""Returns the next batch of data."""
#print('in next', self.cur, self.labelcur)
self.nbatch+=1
batch_size = self.batch_size
c, h, w = self.data_shape
batch_data = nd.empty((batch_size, c, h, w))
if self.provide_label is not None:
batch_label = nd.empty(self.provide_label[0][1])
i = 0
try:
while i < batch_size:
label, s, bbox, landmark = self.next_sample()
#if label[1]>=0.0 or label[2]>=0.0:
# print(label[0:10])
_data = self.imdecode(s)
if self.rand_mirror:
_rd = random.randint(0,1)
if _rd==1:
_data = mx.ndarray.flip(data=_data, axis=1)
#_data = _data.astype('float32')
#_data -= 127.5
#_data *= 0.0078125
if self.cutoff>0:
centerh = random.randint(0, _data.shape[0]-1)
centerw = random.randint(0, _data.shape[1]-1)
half = self.cutoff//2
starth = max(0, centerh-half)
endh = min(_data.shape[0], centerh+half)
startw = max(0, centerw-half)
endw = min(_data.shape[1], centerw+half)
_data = _data.astype('float32')
#print(starth, endh, startw, endw, _data.shape)
_data[starth:endh, startw:endw, :] = 127.5
data = [_data]
try:
self.check_valid_image(data)
except RuntimeError as e:
logging.debug('Invalid image, skipping: %s', str(e))
continue
#print('aa',data[0].shape)
#data = self.augmentation_transform(data)
#print('bb',data[0].shape)
for datum in data:
assert i < batch_size, 'Batch size must be multiples of augmenter output length'
#print(datum.shape)
batch_data[i][:] = self.postprocess_data(datum)
batch_label[i][:] = label
i += 1
except StopIteration:
if i<batch_size:
raise StopIteration
return io.DataBatch([batch_data], [batch_label], batch_size - i)
def check_data_shape(self, data_shape):
"""Checks if the input data shape is valid"""
if not len(data_shape) == 3:
raise ValueError('data_shape should have length 3, with dimensions CxHxW')
if not data_shape[0] == 3:
raise ValueError('This iterator expects inputs to have 3 channels.')
def check_valid_image(self, data):
"""Checks if the input data is valid"""
if len(data[0].shape) == 0:
raise RuntimeError('Data shape is wrong')
def imdecode(self, s):
"""Decodes a string or byte string to an NDArray.
See mx.img.imdecode for more details."""
img = mx.image.imdecode(s) #mx.ndarray
return img
def read_image(self, fname):
"""Reads an input image `fname` and returns the decoded raw bytes.
Example usage:
----------
>>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
"""
with open(os.path.join(self.path_root, fname), 'rb') as fin:
img = fin.read()
return img
def augmentation_transform(self, data):
"""Transforms input data with specified augmentation."""
for aug in self.auglist:
data = [ret for src in data for ret in aug(src)]
return data
def postprocess_data(self, datum):
"""Final postprocessing step before image is loaded into the batch."""
return nd.transpose(datum, axes=(2, 0, 1))
class FaceImageIterList(io.DataIter):
def __init__(self, iter_list):
assert len(iter_list)>0
self.provide_data = iter_list[0].provide_data
self.provide_label = iter_list[0].provide_label
self.iter_list = iter_list
self.cur_iter = None
def reset(self):
self.cur_iter.reset()
def next(self):
self.cur_iter = random.choice(self.iter_list)
while True:
try:
ret = self.cur_iter.next()
except StopIteration:
self.cur_iter.reset()
continue
return ret

View File

@@ -1,195 +0,0 @@
import mxnet as mx
from mxnet import gluon
from mxnet import profiler
from mxnet.gluon import nn
from mxnet import ndarray as nd
import fresnet
class EmbeddingBlock(gluon.HybridBlock):
def __init__(self, emb_size = 512, mode='E', **kwargs):
super(EmbeddingBlock, self).__init__(**kwargs)
self.emb_size = emb_size
print('mode', mode)
with self.name_scope():
self.body = nn.HybridSequential(prefix='')
if mode=='D':
self.body.add(nn.BatchNorm())
self.body.add(nn.Activation('relu'))
self.body.add(nn.GlobalAvgPool2D())
self.body.add(nn.Flatten())
self.body.add(nn.Dense(emb_size))
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
elif mode=='E':
self.body.add(nn.BatchNorm(epsilon=2e-5))
self.body.add(nn.Dropout(0.4))
#self.body.add(nn.Flatten())
self.body.add(nn.Dense(emb_size))
self.body.add(nn.BatchNorm(scale=False, epsilon=2e-5, prefix='fc1'))
elif mode=='Z':
#self.body.add(nn.BatchNorm(epsilon=2e-5))
#self.body.add(nn.Activation('relu'))
#self.body.add(nn.GlobalAvgPool2D())
#self.body.add(nn.Flatten())
self.body.add(nn.BatchNorm(epsilon=2e-5))
self.body.add(nn.Dropout(0.4))
#self.body.add(nn.Flatten())
self.body.add(nn.Dense(emb_size))
#self.body.add(nn.BatchNorm(scale=False, epsilon=2e-5, prefix='fc1'))
else:
self.body.add(nn.BatchNorm(epsilon=2e-5))
self.body.add(nn.Activation('relu'))
self.body.add(nn.GlobalAvgPool2D())
self.body.add(nn.Flatten())
def hybrid_forward(self, F, x):
x = self.body(x)
#bn_mom = 0.9
#x = F.BatchNorm(data=x, fix_gamma=True, eps=2e-5, momentum=bn_mom)
return x
#return x
class ArcMarginBlock(gluon.HybridBlock):
def __init__(self, args, **kwargs):
super(ArcMarginBlock, self).__init__(**kwargs)
self.margin_s = args.margin_s
self.margin_m = args.margin_m
self.margin_a = args.margin_a
self.margin_b = args.margin_b
self.num_classes = args.num_classes
self.emb_size = args.emb_size
#self.weight = gluon.Parameter(name = 'fc7_weight', shape = (self.num_classes, self.emb_size))
#self.weight.initialize()
#self._weight = nd.empty(shape = (self.num_classes, self.emb_size))
#if self.margin_a>0.0:
with self.name_scope():
self.fc7_weight = self.params.get('fc7_weight', shape=(self.num_classes, self.emb_size))
#else:
# self.dense = nn.Dense(self.num_classes, prefix='fc7')
self.body = nn.HybridSequential(prefix='')
feat = fresnet.get(args.num_layers,
version_unit=args.version_unit,
version_act=args.version_act)
self.body.add(feat)
self.body.add(EmbeddingBlock(args.emb_size, args.version_output, prefix=''))
def feature(self, x):
feat = self.body(x)
return feat
def hybrid_forward(self, F, x, label, fc7_weight):
feat = self.body(x)
if self.margin_a==0.0:
fc7 = F.FullyConnected(feat, fc7_weight, no_bias = True, num_hidden=self.num_classes, name='fc7')
#fc7 = self.dense(feat)
#with x.context:
# _w = self._weight.data()
#_b = self._bias.data()
#fc7 = nd.FullyConnected(data=feat, weight=_w, bias = _b, num_hidden=self.num_classes, name='fc7')
#fc7 = F.softmax_cross_entropy(data = fc7, label=label)
return fc7
nx = F.L2Normalization(feat, mode='instance', name='fc1n')*self.margin_s
w = F.L2Normalization(fc7_weight, mode='instance')
fc7 = F.FullyConnected(nx, w, no_bias = True, num_hidden=self.num_classes, name='fc7')
#fc7 = self.dense(nx)
if self.margin_a!=1.0 or self.margin_m!=0.0 or self.margin_b!=0.0:
if self.margin_a==1.0 and self.margin_m==0.0:
s_m = s*self.margin_b
gt_one_hot = F.one_hot(label, depth = self.num_classes, on_value = s_m, off_value = 0.0)
fc7 = fc7-gt_one_hot
else:
zy = F.pick(fc7, label, axis=1)
cos_t = zy/self.margin_s
t = F.arccos(cos_t)
if self.margin_a!=1.0:
t = t*self.margin_a
if self.margin_m>0.0:
t = t+self.margin_m
body = F.cos(t)
if self.margin_b>0.0:
body = body - self.margin_b
new_zy = body*self.margin_s
diff = new_zy - zy
diff = F.expand_dims(diff, 1)
gt_one_hot = F.one_hot(label, depth = self.num_classes, on_value = 1.0, off_value = 0.0)
body = F.broadcast_mul(gt_one_hot, diff)
fc7 = fc7+body
return fc7
#def hybrid_forward(self, F, x):
# feat = self.body(x)
# return feat
class DenseBlock(gluon.HybridBlock):
def __init__(self, args, **kwargs):
super(DenseBlock, self).__init__(**kwargs)
self.num_classes = args.num_classes
self.emb_size = args.emb_size
self.body = nn.HybridSequential(prefix='')
feat = fresnet.get(args.num_layers,
version_unit=args.version_unit,
version_act=args.version_act)
self.body.add(feat)
self.body.add(EmbeddingBlock(args.emb_size, args.version_output, prefix=''))
self.dense = nn.Dense(self.num_classes, prefix='fc7')
def feature(self, x):
feat = self.body(x)
return feat
def hybrid_forward(self, F, x):
feat = self.body(x)
fc7 = self.dense(feat)
return fc7
class ArcMarginTestBlock(gluon.Block):
def __init__(self, args, **kwargs):
super(ArcMarginTestBlock, self).__init__(**kwargs)
self.body = nn.HybridSequential(prefix='')
feat = fresnet.get(args.num_layers,
version_unit=args.version_unit,
version_act=args.version_act)
self.body.add(feat)
self.body.add(EmbeddingBlock(args.emb_size, args.version_output))
def forward(self, x):
feat = self.body(x)
return feat
class _GABlock(gluon.HybridBlock):
def __init__(self, args, num_classes, **kwargs):
super(_GABlock, self).__init__(**kwargs)
with self.name_scope():
self.body = nn.HybridSequential(prefix='')
feat = fresnet.get(args.num_layers,
version_unit=args.version_unit,
version_act=args.version_act)
self.body.add(feat)
self.body.add(EmbeddingBlock(mode=args.version_output))
self.body.add(nn.Dense(num_classes))
def hybrid_forward(self, F, x):
return self.body(x)
class GABlock(gluon.HybridBlock):
def __init__(self, args, **kwargs):
super(GABlock, self).__init__(**kwargs)
with self.name_scope():
#args.num_classes = 2
self.bodyg = _GABlock(args, 2, prefix='gender_')
#args.num_classes = 200
self.bodya = _GABlock(args, 200, prefix='age_')
#if args.task=='age':
# self.bodyg.collect_params().setattr('grad_req', 'null')
#elif args.task=='gender':
# self.bodya.collect_params().setattr('grad_req', 'null')
#self.body = nn.HybridSequential(prefix='')
def hybrid_forward(self, F, x):
g = self.bodyg(x)
a = self.bodya(x)
f = F.concat(g,a,dim=1, name='fc1')
return [f,g,a]

View File

@@ -1,29 +0,0 @@
class EmbeddingBlock(HybridBlock):
def __init__(self, emb_size = 512, mode='E', **kwargs):
super(EmbeddingBlock, self).__init__(**kwargs)
self.body = nn.HybridSequential(prefix='')
if mode=='D':
self.body.add(nn.BatchNorm())
self.body.add(nn.Activation('relu'))
self.body.add(nn.GlobalAvgPool2D())
self.body.add(nn.Flatten())
self.body.add(nn.Dense(emb_size))
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
elif mode=='E':
self.body.add(nn.BatchNorm())
self.body.add(nn.Dropout(0.4))
self.body.add(nn.Dense(emb_size))
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
else:
self.body.add(nn.BatchNorm())
self.body.add(nn.Activation('relu'))
self.body.add(nn.GlobalAvgPool2D())
self.body.add(nn.Flatten())
def hybrid_forward(self, F, x):
x = self.body(x)
return x
class MarginBlock(HybridBlock):
def __init__(self, args, **kwargs):

View File

@@ -1,232 +0,0 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# coding: utf-8
# pylint: disable= arguments-differ
"""ResNets, implemented in Gluon."""
from __future__ import division
#__all__ = ['ResNetV1', 'ResNetV2',
# 'BasicBlockV1', 'BasicBlockV2',
# 'BottleneckV1', 'BottleneckV2',
# 'resnet18_v1', 'resnet34_v1', 'resnet50_v1', 'resnet101_v1', 'resnet152_v1',
# 'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet101_v2', 'resnet152_v2',
# 'get_resnet']
import os
#from ....context import cpu
from mxnet import gluon
from mxnet import profiler
from mxnet.gluon import nn
from mxnet.gluon.block import HybridBlock
# Helpers
def _conv3x3(channels, stride, in_channels):
return nn.Conv2D(channels, kernel_size=3, strides=stride, padding=1,
use_bias=False, in_channels=in_channels)
def _act(act_type):
if act_type=='prelu':
return nn.PReLU()
else:
return nn.Activation(act_type)
# Blocks
class BasicBlockV1(HybridBlock):
r"""BasicBlock V1 from `"Deep Residual Learning for Image Recognition"
<http://arxiv.org/abs/1512.03385>`_ paper.
This is used for ResNet V1 for 18, 34 layers.
Parameters
----------
channels : int
Number of output channels.
stride : int
Stride size.
downsample : bool, default False
Whether to downsample the input.
in_channels : int, default 0
Number of input channels. Default is 0, to infer from the graph.
"""
def __init__(self, channels, stride, downsample=False, in_channels=0, act_type = 'relu', **kwargs):
super(BasicBlockV1, self).__init__(**kwargs)
self.act_type = act_type
self.body = nn.HybridSequential(prefix='')
self.body.add(_conv3x3(channels, 1, in_channels))
self.body.add(nn.BatchNorm(epsilon=2e-5))
self.body.add(_act(act_type))
self.body.add(_conv3x3(channels, stride, channels))
self.body.add(nn.BatchNorm(epsilon=2e-5))
if self.act_type=='prelu':
self.prelu = nn.PReLU()
if downsample:
self.downsample = nn.HybridSequential(prefix='')
self.downsample.add(nn.Conv2D(channels, kernel_size=1, strides=stride,
use_bias=False, in_channels=in_channels))
self.downsample.add(nn.BatchNorm(epsilon=2e-5))
else:
self.downsample = None
def hybrid_forward(self, F, x):
residual = x
x = self.body(x)
if self.downsample:
residual = self.downsample(residual)
if self.act_type=='prelu':
x = self.prelu(x+residual)
#x = F.LeakyReLU(residual+x, act_type = self.act_type)
else:
x = F.Activation(x+residual, act_type=self.act_type)
return x
class BasicBlockV2(HybridBlock):
r"""BasicBlock V2 from
`"Identity Mappings in Deep Residual Networks"
<https://arxiv.org/abs/1603.05027>`_ paper.
This is used for ResNet V2 for 18, 34 layers.
Parameters
----------
channels : int
Number of output channels.
stride : int
Stride size.
downsample : bool, default False
Whether to downsample the input.
in_channels : int, default 0
Number of input channels. Default is 0, to infer from the graph.
"""
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs):
super(BasicBlockV2, self).__init__(**kwargs)
self.bn1 = nn.BatchNorm()
self.conv1 = _conv3x3(channels, stride, in_channels)
self.bn2 = nn.BatchNorm()
self.conv2 = _conv3x3(channels, 1, channels)
if downsample:
self.downsample = nn.Conv2D(channels, 1, stride, use_bias=False,
in_channels=in_channels)
else:
self.downsample = None
def hybrid_forward(self, F, x):
residual = x
x = self.bn1(x)
x = F.Activation(x, act_type='relu')
if self.downsample:
residual = self.downsample(x)
x = self.conv1(x)
x = self.bn2(x)
x = F.Activation(x, act_type='relu')
x = self.conv2(x)
return x + residual
class ResNet(HybridBlock):
r"""ResNet V2 model from
`"Identity Mappings in Deep Residual Networks"
<https://arxiv.org/abs/1603.05027>`_ paper.
Parameters
----------
block : HybridBlock
Class for the residual block. Options are BasicBlockV1, BottleneckV1.
layers : list of int
Numbers of layers in each block
channels : list of int
Numbers of channels in each block. Length should be one larger than layers list.
classes : int, default 1000
Number of classification classes.
thumbnail : bool, default False
Enable thumbnail.
"""
def __init__(self, layers, channels, **kwargs):
version_unit = kwargs.get('version_unit', 1)
act_type = kwargs.get('version_act', 'prelu')
self.act_type = act_type
del kwargs['version_unit']
del kwargs['version_act']
super(ResNet, self).__init__(**kwargs)
assert len(layers) == len(channels) - 1
print(version_unit, act_type)
if version_unit==1:
block = BasicBlockV1
elif version_unit==2:
block = BasicBlockV2
with self.name_scope():
self.features = nn.HybridSequential(prefix='')
#self.features.add(nn.BatchNorm(scale=False, center=False))
#self.features.add(nn.BatchNorm())
self.features.add(_conv3x3(channels[0], 1, 0))
self.features.add(nn.BatchNorm(epsilon=2e-5))
self.features.add(_act(act_type))
in_channels = channels[0]
for i, num_layer in enumerate(layers):
#stride = 1 if i == 0 else 2
stride = 2
self.features.add(self._make_layer(block, num_layer, channels[i+1],
stride, i+1, in_channels=in_channels))
in_channels = channels[i+1]
#self.features.add(nn.BatchNorm())
#self.features.add(nn.Activation('relu'))
#self.features.add(nn.GlobalAvgPool2D())
#self.features.add(nn.Flatten())
#self.output = nn.Dense(classes, in_units=in_channels)
def _make_layer(self, block, layers, channels, stride, stage_index, in_channels=0):
layer = nn.HybridSequential(prefix='stage%d_'%stage_index)
with layer.name_scope():
#print(channels, in_channels)
layer.add(block(channels, stride, True, in_channels=in_channels, act_type = self.act_type,
prefix=''))
for _ in range(layers-1):
layer.add(block(channels, 1, False, in_channels=channels, act_type = self.act_type, prefix=''))
return layer
def hybrid_forward(self, F, x):
x = x-127.5
x = x*0.0078125
x = self.features(x)
return x
# Specification
resnet_spec = {18: ('basic_block', [2, 2, 2, 2], [64, 64, 128, 256, 512]),
34: ('basic_block', [3, 4, 6, 3], [64, 64, 128, 256, 512]),
50: ('basic_block', [3, 4, 14, 3], [64, 64, 128, 256, 512]),
100: ('basic_block', [3, 13, 30, 3], [64, 64, 128, 256, 512]),
152: ('bottle_neck', [3, 8, 36, 3], [64, 256, 512, 1024, 2048])}
# Constructor
def get(num_layers, **kwargs):
assert num_layers in resnet_spec, \
"Invalid number of layers: %d. Options are %s"%(
num_layers, str(resnet_spec.keys()))
block_type, layers, channels = resnet_spec[num_layers]
net = ResNet(layers, channels, **kwargs)
return net

View File

@@ -1,285 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import logging
import sys
import numbers
import math
import sklearn
import datetime
import numpy as np
import cv2
import mxnet as mx
from mxnet import ndarray as nd
from mxnet import io
from mxnet import recordio
#sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'common'))
import face_preprocess
import multiprocessing
logger = logging.getLogger()
class FaceImageIter(io.DataIter):
def __init__(self, batch_size, data_shape,
path_imgrec = None,
shuffle=False, aug_list=None, mean = None,
rand_mirror = False, cutoff = 0,
data_name='data', label_name='softmax_label', **kwargs):
super(FaceImageIter, self).__init__()
assert path_imgrec
if path_imgrec:
logging.info('loading recordio %s...',
path_imgrec)
path_imgidx = path_imgrec[0:-4]+".idx"
self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
s = self.imgrec.read_idx(0)
header, _ = recordio.unpack(s)
if header.flag>0:
print('header0 label', header.label)
self.header0 = (int(header.label[0]), int(header.label[1]))
#assert(header.flag==1)
self.imgidx = range(1, int(header.label[0]))
self.id2range = {}
self.seq_identity = range(int(header.label[0]), int(header.label[1]))
for identity in self.seq_identity:
s = self.imgrec.read_idx(identity)
header, _ = recordio.unpack(s)
a,b = int(header.label[0]), int(header.label[1])
self.id2range[identity] = (a,b)
count = b-a
print('id2range', len(self.id2range))
else:
self.imgidx = list(self.imgrec.keys)
if shuffle:
self.seq = self.imgidx
self.oseq = self.imgidx
print(len(self.seq))
else:
self.seq = None
self.mean = mean
self.nd_mean = None
if self.mean:
self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))
self.check_data_shape(data_shape)
self.provide_data = [(data_name, (batch_size,) + data_shape)]
self.batch_size = batch_size
self.data_shape = data_shape
self.shuffle = shuffle
self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
self.rand_mirror = rand_mirror
print('rand_mirror', rand_mirror)
self.cutoff = cutoff
self.provide_label = [(label_name, (batch_size,))]
#print(self.provide_label[0][1])
self.cur = 0
self.nbatch = 0
self.is_init = False
def reset(self):
"""Resets the iterator to the beginning of the data."""
print('call reset()')
self.cur = 0
if self.shuffle:
random.shuffle(self.seq)
if self.seq is None and self.imgrec is not None:
self.imgrec.reset()
def num_samples(self):
return len(self.seq)
def next_sample(self):
"""Helper function for reading in next sample."""
#set total batch size, for example, 1800, and maximum size for each people, for example 45
if self.seq is not None:
while True:
if self.cur >= len(self.seq):
raise StopIteration
idx = self.seq[self.cur]
self.cur += 1
if self.imgrec is not None:
s = self.imgrec.read_idx(idx)
header, img = recordio.unpack(s)
label = header.label
if not isinstance(label, numbers.Number):
label = label[0]
return label, img, None, None
else:
label, fname, bbox, landmark = self.imglist[idx]
return label, self.read_image(fname), bbox, landmark
else:
s = self.imgrec.read()
if s is None:
raise StopIteration
header, img = recordio.unpack(s)
return header.label, img, None, None
def brightness_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
src *= alpha
return src
def contrast_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
coef = np.array([[[0.299, 0.587, 0.114]]])
gray = src * coef
gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
src *= alpha
src += gray
return src
def saturation_aug(self, src, x):
alpha = 1.0 + random.uniform(-x, x)
coef = np.array([[[0.299, 0.587, 0.114]]])
gray = src * coef
gray = np.sum(gray, axis=2, keepdims=True)
gray *= (1.0 - alpha)
src *= alpha
src += gray
return src
def color_aug(self, img, x):
augs = [self.brightness_aug, self.contrast_aug, self.saturation_aug]
random.shuffle(augs)
for aug in augs:
#print(img.shape)
img = aug(img, x)
#print(img.shape)
return img
def mirror_aug(self, img):
_rd = random.randint(0,1)
if _rd==1:
for c in xrange(img.shape[2]):
img[:,:,c] = np.fliplr(img[:,:,c])
return img
def next(self):
if not self.is_init:
self.reset()
self.is_init = True
"""Returns the next batch of data."""
#print('in next', self.cur, self.labelcur)
self.nbatch+=1
batch_size = self.batch_size
c, h, w = self.data_shape
batch_data = nd.empty((batch_size, c, h, w))
if self.provide_label is not None:
batch_label = nd.empty(self.provide_label[0][1])
i = 0
try:
while i < batch_size:
label, s, bbox, landmark = self.next_sample()
_data = self.imdecode(s)
if self.rand_mirror:
_rd = random.randint(0,1)
if _rd==1:
_data = mx.ndarray.flip(data=_data, axis=1)
if self.nd_mean is not None:
_data = _data.astype('float32')
_data -= 127.5
_data *= 0.0078125
if self.cutoff>0:
centerh = random.randint(0, _data.shape[0]-1)
centerw = random.randint(0, _data.shape[1]-1)
half = self.cutoff//2
starth = max(0, centerh-half)
endh = min(_data.shape[0], centerh+half)
startw = max(0, centerw-half)
endw = min(_data.shape[1], centerw+half)
_data = _data.astype('float32')
#print(starth, endh, startw, endw, _data.shape)
_data[starth:endh, startw:endw, :] = 127.5
data = [_data]
try:
self.check_valid_image(data)
except RuntimeError as e:
logging.debug('Invalid image, skipping: %s', str(e))
continue
#print('aa',data[0].shape)
#data = self.augmentation_transform(data)
#print('bb',data[0].shape)
for datum in data:
assert i < batch_size, 'Batch size must be multiples of augmenter output length'
#print(datum.shape)
batch_data[i][:] = self.postprocess_data(datum)
batch_label[i][:] = label
i += 1
except StopIteration:
if i<batch_size:
raise StopIteration
return io.DataBatch([batch_data], [batch_label], batch_size - i)
def check_data_shape(self, data_shape):
"""Checks if the input data shape is valid"""
if not len(data_shape) == 3:
raise ValueError('data_shape should have length 3, with dimensions CxHxW')
if not data_shape[0] == 3:
raise ValueError('This iterator expects inputs to have 3 channels.')
def check_valid_image(self, data):
"""Checks if the input data is valid"""
if len(data[0].shape) == 0:
raise RuntimeError('Data shape is wrong')
def imdecode(self, s):
"""Decodes a string or byte string to an NDArray.
See mx.img.imdecode for more details."""
img = mx.image.imdecode(s) #mx.ndarray
return img
def read_image(self, fname):
"""Reads an input image `fname` and returns the decoded raw bytes.
Example usage:
----------
>>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
"""
with open(os.path.join(self.path_root, fname), 'rb') as fin:
img = fin.read()
return img
def augmentation_transform(self, data):
"""Transforms input data with specified augmentation."""
for aug in self.auglist:
data = [ret for src in data for ret in aug(src)]
return data
def postprocess_data(self, datum):
"""Final postprocessing step before image is loaded into the batch."""
return nd.transpose(datum, axes=(2, 0, 1))
class FaceImageIterList(io.DataIter):
def __init__(self, iter_list):
assert len(iter_list)>0
self.provide_data = iter_list[0].provide_data
self.provide_label = iter_list[0].provide_label
self.iter_list = iter_list
self.cur_iter = None
def reset(self):
self.cur_iter.reset()
def next(self):
self.cur_iter = random.choice(self.iter_list)
while True:
try:
ret = self.cur_iter.next()
except StopIteration:
self.cur_iter.reset()
continue
return ret

View File

@@ -1,747 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import math
import random
import logging
import time
import pickle
import numpy as np
import sklearn
from image_iter import FaceImageIter
from age_iter import FaceImageIter as FaceImageIterAge
#from image_iter import FaceImageIterList
import mxnet as mx
from mxnet import gluon
from mxnet import profiler
from mxnet.gluon import nn
from mxnet import ndarray as nd
from mxnet import autograd as ag
from mxnet.test_utils import get_mnist_iterator
from mxnet.metric import Accuracy, TopKAccuracy, CompositeEvalMetric
import argparse
import mxnet.optimizer as optimizer
#sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'eval'))
import verification
#sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'common'))
import face_image
#sys.path.append(os.path.join(os.path.dirname(__file__), 'eval'))
sys.path.append(os.path.join(os.path.dirname(__file__), 'blocks'))
import fresnet
from UDD import *
#import finception_resnet_v2
#import fmobilenet
#import fmobilenetv2
#import fmobilefacenet
#import fxception
#import fdensenet
#import fdpn
#import fnasnet
#import spherenet
#sys.path.append(os.path.join(os.path.dirname(__file__), 'losses'))
#import center_loss
logger = logging.getLogger()
logger.setLevel(logging.INFO)
AGE = 100
args = None
class AccMetric(mx.metric.EvalMetric):
def __init__(self):
self.axis = 1
super(AccMetric, self).__init__(
'acc', axis=self.axis,
output_names=None, label_names=None)
self.losses = []
self.count = 0
def update(self, labels, preds):
self.count+=1
#preds = [preds[1]] #use softmax output
for label, pred_label in zip(labels, preds):
if pred_label.shape != label.shape:
pred_label = mx.ndarray.argmax(pred_label, axis=self.axis)
pred_label = pred_label.asnumpy().astype('int32').flatten()
label = label.asnumpy()
if label.ndim==2:
label = label[:,0]
label = label.astype('int32').flatten()
assert label.shape==pred_label.shape
self.sum_metric += (pred_label.flat == label.flat).sum()
self.num_inst += len(pred_label.flat)
class LossValueMetric(mx.metric.EvalMetric):
def __init__(self):
self.axis = 1
super(LossValueMetric, self).__init__(
'lossvalue', axis=self.axis,
output_names=None, label_names=None)
self.losses = []
def update(self, labels, preds):
loss = preds[-1].asnumpy()[0]
self.sum_metric += loss
self.num_inst += 1.0
gt_label = preds[-2].asnumpy()
#print(gt_label)
class MAEMetric(mx.metric.EvalMetric):
def __init__(self):
self.axis = 1
super(MAEMetric, self).__init__(
'MAE', axis=self.axis,
output_names=None, label_names=None)
self.losses = []
self.count = 0
def update(self, labels, preds):
self.count+=1
label = labels[0].asnumpy()
label_age = np.count_nonzero(label, axis=1)
pred_age = np.zeros( label_age.shape, dtype=np.int)
#pred_age = np.zeros( label_age.shape, dtype=np.float32)
pred = preds[0].asnumpy()
for i in xrange(AGE):
_pred = pred[:,i*2:(i*2+2)]
_pred = np.argmax(_pred, axis=1)
#pred = pred[:,1]
pred_age += _pred
#pred_age = pred_age.astype(np.int)
mae = np.mean(np.abs(label_age - pred_age))
self.sum_metric += mae
self.num_inst += 1.0
class CUMMetric(mx.metric.EvalMetric):
def __init__(self, n=5):
self.axis = 1
self.n = n
super(CUMMetric, self).__init__(
'CUM_%d'%n, axis=self.axis,
output_names=None, label_names=None)
self.losses = []
self.count = 0
def update(self, labels, preds):
self.count+=1
label = labels[0].asnumpy()
label_age = np.count_nonzero(label, axis=1)
pred_age = np.zeros( label_age.shape, dtype=np.int)
pred = preds[0].asnumpy()
for i in xrange(AGE):
_pred = pred[:,i*2:(i*2+2)]
_pred = np.argmax(_pred, axis=1)
#pred = pred[:,1]
pred_age += _pred
diff = np.abs(label_age - pred_age)
cum = np.sum( (diff<self.n) )
self.sum_metric += cum
self.num_inst += len(label_age)
def parse_args():
global args
parser = argparse.ArgumentParser(description='Train face network')
# general
parser.add_argument('--data-dir', default='', help='training set directory')
parser.add_argument('--gender-data-dir', default='', help='training set directory')
parser.add_argument('--age-data-dir', default='', help='training set directory')
parser.add_argument('--prefix', default='../model/model', help='directory to save model.')
parser.add_argument('--pretrained', default='', help='pretrained model to load')
parser.add_argument('--ckpt', type=int, default=1, help='checkpoint saving option. 0: discard saving. 1: save when necessary. 2: always save')
parser.add_argument('--loss-type', type=int, default=4, help='loss type')
parser.add_argument('--verbose', type=int, default=2000, help='do verification testing and model saving every verbose batches')
parser.add_argument('--max-steps', type=int, default=0, help='max training batches')
parser.add_argument('--end-epoch', type=int, default=100000, help='training epoch size.')
parser.add_argument('--network', default='r50', help='specify network')
parser.add_argument('--version-output', type=str, default='E', help='network embedding output config')
parser.add_argument('--version-unit', type=int, default=1, help='resnet unit config')
parser.add_argument('--version-act', type=str, default='relu', help='network activation config')
parser.add_argument('--lr', type=float, default=0.1, help='start learning rate')
parser.add_argument('--lr-steps', type=str, default='', help='steps of lr changing')
parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
parser.add_argument('--fc7-wd-mult', type=float, default=1.0, help='weight decay mult for fc7')
parser.add_argument('--bn-mom', type=float, default=0.9, help='bn mom')
parser.add_argument('--mom', type=float, default=0.9, help='momentum')
parser.add_argument('--emb-size', type=int, default=512, help='embedding length')
parser.add_argument('--per-batch-size', type=int, default=128, help='batch size in each context')
parser.add_argument('--margin-m', type=float, default=0.5, help='margin for loss')
parser.add_argument('--margin-s', type=float, default=64.0, help='scale for feature')
parser.add_argument('--margin-a', type=float, default=1.0, help='')
parser.add_argument('--margin-b', type=float, default=0.0, help='')
parser.add_argument('--rand-mirror', type=int, default=1, help='if do random mirror in training')
parser.add_argument('--cutoff', type=int, default=0, help='cut off aug')
parser.add_argument('--eval', type=str, default='lfw,cfp_fp,agedb_30', help='verification targets')
parser.add_argument('--task', type=str, default='', help='')
parser.add_argument('--mode', type=str, default='gluon', help='')
args = parser.parse_args()
return args
def get_model():
#print('init resnet', args.num_layers)
if args.task=='':
if args.margin_a>0.0:
return ArcMarginBlock(args, prefix='')
else:
return DenseBlock(args, prefix='')
else:#AGE or GENDER
return GABlock(args, prefix='')
#def get_symbol(args, arg_params, aux_params):
# data_shape = (args.image_channel,args.image_h,args.image_w)
# image_shape = ",".join([str(x) for x in data_shape])
# margin_symbols = []
# if args.network[0]=='d':
# embedding = fdensenet.get_symbol(args.emb_size, args.num_layers,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit)
# elif args.network[0]=='m':
# print('init mobilenet', args.num_layers)
# if args.num_layers==1:
# embedding = fmobilenet.get_symbol(args.emb_size,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit)
# else:
# embedding = fmobilenetv2.get_symbol(args.emb_size)
# elif args.network[0]=='i':
# print('init inception-resnet-v2', args.num_layers)
# embedding = finception_resnet_v2.get_symbol(args.emb_size,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit)
# elif args.network[0]=='x':
# print('init xception', args.num_layers)
# embedding = fxception.get_symbol(args.emb_size,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit)
# elif args.network[0]=='p':
# print('init dpn', args.num_layers)
# embedding = fdpn.get_symbol(args.emb_size, args.num_layers,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit)
# elif args.network[0]=='n':
# print('init nasnet', args.num_layers)
# embedding = fnasnet.get_symbol(args.emb_size)
# elif args.network[0]=='s':
# print('init spherenet', args.num_layers)
# embedding = spherenet.get_symbol(args.emb_size, args.num_layers)
# elif args.network[0]=='y':
# print('init mobilefacenet', args.num_layers)
# embedding = fmobilefacenet.get_symbol(args.emb_size, bn_mom = args.bn_mom, wd_mult = args.fc7_wd_mult)
# else:
# print('init resnet', args.num_layers)
# embedding = fresnet.get_symbol(args.emb_size, args.num_layers,
# version_se=args.version_se, version_input=args.version_input,
# version_output=args.version_output, version_unit=args.version_unit,
# version_act=args.version_act)
# all_label = mx.symbol.Variable('softmax_label')
# gt_label = all_label
# extra_loss = None
# _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0, wd_mult=args.fc7_wd_mult)
# if args.loss_type==0: #softmax
# _bias = mx.symbol.Variable('fc7_bias', lr_mult=2.0, wd_mult=0.0)
# fc7 = mx.sym.FullyConnected(data=embedding, weight = _weight, bias = _bias, num_hidden=args.num_classes, name='fc7')
# elif args.loss_type==1: #sphere
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
# fc7 = mx.sym.LSoftmax(data=embedding, label=gt_label, num_hidden=args.num_classes,
# weight = _weight,
# beta=args.beta, margin=args.margin, scale=args.scale,
# beta_min=args.beta_min, verbose=1000, name='fc7')
# elif args.loss_type==2:
# s = args.margin_s
# m = args.margin_m
# assert(s>0.0)
# assert(m>0.0)
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
# s_m = s*m
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = s_m, off_value = 0.0)
# fc7 = fc7-gt_one_hot
# elif args.loss_type==4:
# s = args.margin_s
# m = args.margin_m
# assert s>0.0
# assert m>=0.0
# assert m<(math.pi/2)
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
# zy = mx.sym.pick(fc7, gt_label, axis=1)
# cos_t = zy/s
# cos_m = math.cos(m)
# sin_m = math.sin(m)
# mm = math.sin(math.pi-m)*m
# #threshold = 0.0
# threshold = math.cos(math.pi-m)
# if args.easy_margin:
# cond = mx.symbol.Activation(data=cos_t, act_type='relu')
# else:
# cond_v = cos_t - threshold
# cond = mx.symbol.Activation(data=cond_v, act_type='relu')
# body = cos_t*cos_t
# body = 1.0-body
# sin_t = mx.sym.sqrt(body)
# new_zy = cos_t*cos_m
# b = sin_t*sin_m
# new_zy = new_zy - b
# new_zy = new_zy*s
# if args.easy_margin:
# zy_keep = zy
# else:
# zy_keep = zy - s*mm
# new_zy = mx.sym.where(cond, new_zy, zy_keep)
#
# diff = new_zy - zy
# diff = mx.sym.expand_dims(diff, 1)
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0)
# body = mx.sym.broadcast_mul(gt_one_hot, diff)
# fc7 = fc7+body
# elif args.loss_type==5:
# s = args.margin_s
# m = args.margin_m
# assert s>0.0
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
# if args.margin_a!=1.0 or args.margin_m!=0.0 or args.margin_b!=0.0:
# if args.margin_a==1.0 and args.margin_m==0.0:
# s_m = s*args.margin_b
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = s_m, off_value = 0.0)
# fc7 = fc7-gt_one_hot
# else:
# zy = mx.sym.pick(fc7, gt_label, axis=1)
# cos_t = zy/s
# t = mx.sym.arccos(cos_t)
# if args.margin_a!=1.0:
# t = t*args.margin_a
# if args.margin_m>0.0:
# t = t+args.margin_m
# body = mx.sym.cos(t)
# if args.margin_b>0.0:
# body = body - args.margin_b
# new_zy = body*s
# diff = new_zy - zy
# diff = mx.sym.expand_dims(diff, 1)
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0)
# body = mx.sym.broadcast_mul(gt_one_hot, diff)
# fc7 = fc7+body
# out_list = [mx.symbol.BlockGrad(embedding)]
# softmax = mx.symbol.SoftmaxOutput(data=fc7, label = gt_label, name='softmax', normalization='valid')
# out_list.append(softmax)
# out = mx.symbol.Group(out_list)
# return (out, arg_params, aux_params)
#
def train_net(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]
print('use cpu')
else:
print('gpu num:', len(ctx))
prefix = args.prefix
prefix_dir = os.path.dirname(prefix)
if not os.path.exists(prefix_dir):
os.makedirs(prefix_dir)
end_epoch = args.end_epoch
args.ctx_num = len(ctx)
args.num_layers = int(args.network[1:])
print('num_layers', args.num_layers)
if args.per_batch_size==0:
args.per_batch_size = 128
args.batch_size = args.per_batch_size*args.ctx_num
args.image_channel = 3
data_dir = args.data_dir
if args.task=='gender':
data_dir = args.gender_data_dir
elif args.task=='age':
data_dir = args.age_data_dir
print('data dir', data_dir)
path_imgrec = None
path_imglist = None
prop = face_image.load_property(data_dir)
args.num_classes = prop.num_classes
image_size = prop.image_size
args.image_h = image_size[0]
args.image_w = image_size[1]
print('image_size', image_size)
assert(args.num_classes>0)
print('num_classes', args.num_classes)
path_imgrec = os.path.join(data_dir, "train.rec")
print('Called with argument:', args)
data_shape = (args.image_channel,image_size[0],image_size[1])
mean = None
begin_epoch = 0
net = get_model()
#if args.task=='':
# test_net = get_model_test(net)
#print(net.__class__)
#net = net0[0]
if args.network[0]=='r' or args.network[0]=='y':
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style
elif args.network[0]=='i' or args.network[0]=='x':
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception
else:
initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
net.hybridize()
if args.mode=='gluon':
if len(args.pretrained)==0:
pass
else:
net.load_params(args.pretrained, allow_missing=True, ignore_extra = True)
net.initialize(initializer)
net.collect_params().reset_ctx(ctx)
val_iter = None
if args.task=='':
train_iter = FaceImageIter(
batch_size = args.batch_size,
data_shape = data_shape,
path_imgrec = path_imgrec,
shuffle = True,
rand_mirror = args.rand_mirror,
mean = mean,
cutoff = args.cutoff,
)
else:
train_iter = FaceImageIterAge(
batch_size = args.batch_size,
data_shape = data_shape,
path_imgrec = path_imgrec,
task = args.task,
shuffle = True,
rand_mirror = args.rand_mirror,
mean = mean,
cutoff = args.cutoff,
)
if args.task=='age':
metric = CompositeEvalMetric([MAEMetric(), CUMMetric()])
elif args.task=='gender':
metric = CompositeEvalMetric([AccMetric()])
else:
metric = CompositeEvalMetric([AccMetric()])
ver_list = []
ver_name_list = []
if args.task=='':
for name in args.eval.split(','):
path = os.path.join(data_dir,name+".bin")
if os.path.exists(path):
data_set = verification.load_bin(path, image_size)
ver_list.append(data_set)
ver_name_list.append(name)
print('ver', name)
def ver_test(nbatch):
results = []
for i in xrange(len(ver_list)):
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], net, ctx, batch_size = args.batch_size)
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
results.append(acc2)
return results
def val_test(nbatch=0):
acc = 0.0
#if args.task=='age':
if len(args.age_data_dir)>0:
val_iter = FaceImageIterAge(
batch_size = args.batch_size,
data_shape = data_shape,
path_imgrec = os.path.join(args.age_data_dir, 'val.rec'),
task = args.task,
shuffle = False,
rand_mirror = False,
mean = mean,
)
_metric = MAEMetric()
val_metric = mx.metric.create(_metric)
val_metric.reset()
_metric2 = CUMMetric()
val_metric2 = mx.metric.create(_metric2)
val_metric2.reset()
val_iter.reset()
for batch in val_iter:
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
outputs = []
for x in data:
outputs.append(net(x)[2])
val_metric.update(label, outputs)
val_metric2.update(label, outputs)
_value = val_metric.get_name_value()[0][1]
print('[%d][VMAE]: %f'%(nbatch, _value))
_value = val_metric2.get_name_value()[0][1]
if args.task=='age':
acc = _value
print('[%d][VCUM]: %f'%(nbatch, _value))
if len(args.gender_data_dir)>0:
val_iter = FaceImageIterAge(
batch_size = args.batch_size,
data_shape = data_shape,
path_imgrec = os.path.join(args.gender_data_dir, 'val.rec'),
task = args.task,
shuffle = False,
rand_mirror = False,
mean = mean,
)
_metric = AccMetric()
val_metric = mx.metric.create(_metric)
val_metric.reset()
val_iter.reset()
for batch in val_iter:
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
outputs = []
for x in data:
outputs.append(net(x)[1])
val_metric.update(label, outputs)
_value = val_metric.get_name_value()[0][1]
if args.task=='gender':
acc = _value
print('[%d][VACC]: %f'%(nbatch, _value))
return acc
total_time = 0
num_epochs = 0
best_acc = [0]
highest_acc = [0.0, 0.0] #lfw and target
global_step = [0]
save_step = [0]
if len(args.lr_steps)==0:
lr_steps = [100000, 140000, 160000]
p = 512.0/args.batch_size
for l in xrange(len(lr_steps)):
lr_steps[l] = int(lr_steps[l]*p)
else:
lr_steps = [int(x) for x in args.lr_steps.split(',')]
print('lr_steps', lr_steps)
kv = mx.kv.create('device')
#kv = mx.kv.create('local')
#_rescale = 1.0/args.ctx_num
#opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
#opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd)
if args.mode=='gluon':
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.mom, 'multi_precision': True},
kvstore=kv)
else:
_rescale = 1.0/args.ctx_num
opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
_cb = mx.callback.Speedometer(args.batch_size, 20)
arg_params = None
aux_params = None
data = mx.sym.var('data')
label = mx.sym.var('softmax_label')
if args.margin_a>0.0:
fc7 = net(data, label)
else:
fc7 = net(data)
#sym = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid')
ceop = gluon.loss.SoftmaxCrossEntropyLoss()
loss = ceop(fc7, label)
#loss = loss/args.per_batch_size
loss = mx.sym.mean(loss)
sym = mx.sym.Group( [mx.symbol.BlockGrad(fc7), mx.symbol.MakeLoss(loss, name='softmax')] )
def _batch_callback():
mbatch = global_step[0]
global_step[0]+=1
for _lr in lr_steps:
if mbatch==_lr:
args.lr *= 0.1
if args.mode=='gluon':
trainer.set_learning_rate(args.lr)
else:
opt.lr = args.lr
print('lr change to', args.lr)
break
#_cb(param)
if mbatch%1000==0:
print('lr-batch-epoch:',args.lr, mbatch)
if mbatch>0 and mbatch%args.verbose==0:
save_step[0]+=1
msave = save_step[0]
do_save = False
is_highest = False
if args.task=='age' or args.task=='gender':
acc = val_test(mbatch)
if acc>=highest_acc[-1]:
highest_acc[-1] = acc
is_highest = True
do_save = True
else:
acc_list = ver_test(mbatch)
if len(acc_list)>0:
lfw_score = acc_list[0]
if lfw_score>highest_acc[0]:
highest_acc[0] = lfw_score
if lfw_score>=0.998:
do_save = True
if acc_list[-1]>=highest_acc[-1]:
highest_acc[-1] = acc_list[-1]
if lfw_score>=0.99:
do_save = True
is_highest = True
if args.ckpt==0:
do_save = False
elif args.ckpt>1:
do_save = True
if do_save:
print('saving', msave)
#print('saving gluon params')
fname = os.path.join(args.prefix, 'model-gluon.params')
net.save_params(fname)
fname = os.path.join(args.prefix, 'model')
net.export(fname, msave)
#arg, aux = model.get_params()
#mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1]))
if args.max_steps>0 and mbatch>args.max_steps:
sys.exit(0)
def _batch_callback_sym(param):
_cb(param)
_batch_callback()
if args.mode!='gluon':
model = mx.mod.Module(
context = ctx,
symbol = sym,
)
model.fit(train_iter,
begin_epoch = 0,
num_epoch = args.end_epoch,
eval_data = None,
eval_metric = metric,
kvstore = 'device',
optimizer = opt,
initializer = initializer,
arg_params = arg_params,
aux_params = aux_params,
allow_missing = True,
batch_end_callback = _batch_callback_sym,
epoch_end_callback = None )
else:
loss_weight = 1.0
if args.task=='age':
loss_weight = 1.0/AGE
#loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight)
loss = nd.SoftmaxOutput
#loss = gluon.loss.SoftmaxCrossEntropyLoss()
while True:
#trainer = update_learning_rate(opt.lr, trainer, epoch, opt.lr_factor, lr_steps)
tic = time.time()
train_iter.reset()
metric.reset()
btic = time.time()
for i, batch in enumerate(train_iter):
_batch_callback()
#data = gluon.utils.split_and_load(batch.data[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
#label = gluon.utils.split_and_load(batch.label[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
outputs = []
Ls = []
with ag.record():
for x, y in zip(data, label):
#print(y.asnumpy())
if args.task=='':
if args.margin_a>0.0:
z = net(x,y)
else:
z = net(x)
#print(z[0].shape, z[1].shape)
else:
z = net(x)
if args.task=='gender':
L = loss(z[1], y)
#L = L/args.per_batch_size
Ls.append(L)
outputs.append(z[1])
elif args.task=='age':
for k in xrange(AGE):
_z = nd.slice_axis(z[2], axis=1, begin=k*2, end=k*2+2)
_y = nd.slice_axis(y, axis=1, begin=k, end=k+1)
_y = nd.flatten(_y)
L = loss(_z, _y)
#L = L/args.per_batch_size
#L /= AGE
Ls.append(L)
outputs.append(z[2])
else:
L = loss(z, y)
#L = L/args.per_batch_size
Ls.append(L)
outputs.append(z)
# store the loss and do backward after we have done forward
# on all GPUs for better speed on multiple GPUs.
ag.backward(Ls)
#trainer.step(batch.data[0].shape[0], ignore_stale_grad=True)
#trainer.step(args.ctx_num)
n = batch.data[0].shape[0]
#print(n,n)
trainer.step(n)
metric.update(label, outputs)
if i>0 and i%20==0:
name, acc = metric.get()
if len(name)==2:
logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f, %s=%f'%(
num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0], name[1], acc[1]))
else:
logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f'%(
num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0]))
#metric.reset()
btic = time.time()
epoch_time = time.time()-tic
# First epoch will usually be much slower than the subsequent epics,
# so don't factor into the average
if num_epochs > 0:
total_time = total_time + epoch_time
#name, acc = metric.get()
#logger.info('[Epoch %d] training: %s=%f, %s=%f'%(num_epochs, name[0], acc[0], name[1], acc[1]))
logger.info('[Epoch %d] time cost: %f'%(num_epochs, epoch_time))
num_epochs = num_epochs + 1
#name, val_acc = test(ctx, val_data)
#logger.info('[Epoch %d] validation: %s=%f, %s=%f'%(epoch, name[0], val_acc[0], name[1], val_acc[1]))
# save model if meet requirements
#save_checkpoint(epoch, val_acc[0], best_acc)
if num_epochs > 1:
print('Average epoch time: {}'.format(float(total_time)/(num_epochs - 1)))
def main():
#time.sleep(3600*6.5)
global args
args = parse_args()
train_net(args)
if __name__ == '__main__':
main()

View File

@@ -1,369 +0,0 @@
"""Helper for evaluation on the Labeled Faces in the Wild dataset
"""
# MIT License
#
# Copyright (c) 2016 David Sandberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import argparse
import sys
import numpy as np
from scipy import misc
from sklearn.model_selection import KFold
from scipy import interpolate
import sklearn
import cv2
import math
import datetime
import pickle
from sklearn.decomposition import PCA
import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
import face_image
class LFold:
def __init__(self, n_splits = 2, shuffle = False):
self.n_splits = n_splits
if self.n_splits>1:
self.k_fold = KFold(n_splits = n_splits, shuffle = shuffle)
def split(self, indices):
if self.n_splits>1:
return self.k_fold.split(indices)
else:
return [(indices, indices)]
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, pca = 0):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds,nrof_thresholds))
fprs = np.zeros((nrof_folds,nrof_thresholds))
accuracy = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
#print('pca', pca)
if pca==0:
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
#print('train_set', train_set)
#print('test_set', test_set)
if pca>0:
print('doing pca on', fold_idx)
embed1_train = embeddings1[train_set]
embed2_train = embeddings2[train_set]
_embed_train = np.concatenate( (embed1_train, embed2_train), axis=0 )
#print(_embed_train.shape)
pca_model = PCA(n_components=pca)
pca_model.fit(_embed_train)
embed1 = pca_model.transform(embeddings1)
embed2 = pca_model.transform(embeddings2)
embed1 = sklearn.preprocessing.normalize(embed1)
embed2 = sklearn.preprocessing.normalize(embed2)
#print(embed1.shape, embed2.shape)
diff = np.subtract(embed1, embed2)
dist = np.sum(np.square(diff),1)
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
best_threshold_index = np.argmax(acc_train)
#print('threshold', thresholds[best_threshold_index])
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
tpr = np.mean(tprs,0)
fpr = np.mean(fprs,0)
return tpr, fpr, accuracy
def calculate_accuracy(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
tp = np.sum(np.logical_and(predict_issame, actual_issame))
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
acc = float(tp+tn)/dist.size
return tpr, fpr, acc
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train)>=far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean
def calculate_val_far(threshold, dist, actual_issame):
predict_issame = np.less(dist, threshold)
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
n_same = np.sum(actual_issame)
n_diff = np.sum(np.logical_not(actual_issame))
#print(true_accept, false_accept)
#print(n_same, n_diff)
val = float(true_accept) / float(n_same)
far = float(false_accept) / float(n_diff)
return val, far
def evaluate(embeddings, actual_issame, nrof_folds=10, pca = 0):
# Calculate evaluation metrics
thresholds = np.arange(0, 4, 0.01)
embeddings1 = embeddings[0::2]
embeddings2 = embeddings[1::2]
tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), nrof_folds=nrof_folds, pca = pca)
thresholds = np.arange(0, 4, 0.001)
val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
return tpr, fpr, accuracy, val, val_std, far
def load_bin(path, image_size):
bins, issame_list = pickle.load(open(path, 'rb'))
data_list = []
for flip in [0,1]:
data = nd.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
data_list.append(data)
for i in xrange(len(issame_list)*2):
_bin = bins[i]
img = mx.image.imdecode(_bin)
img = nd.transpose(img, axes=(2, 0, 1))
for flip in [0,1]:
if flip==1:
img = mx.ndarray.flip(data=img, axis=2)
data_list[flip][i][:] = img
if i%1000==0:
print('loading bin', i)
print(data_list[0].shape)
return (data_list, issame_list)
def test(data_set, net, ctx, batch_size, nfolds=10):
print('testing verification..')
data_list = data_set[0]
issame_list = data_set[1]
embeddings_list = []
time_consumed = 0.0
for i in xrange( len(data_list) ):
data = data_list[i]
embeddings = None
ba = 0
while ba<data.shape[0]:
bb = min(ba+batch_size, data.shape[0])
count = bb-ba
#print(ba, bb)
x = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
#print(_data.shape, _label.shape)
time0 = datetime.datetime.now()
#x = x.as_in_context(ctx[0])
xs = gluon.utils.split_and_load(x, ctx_list=ctx, batch_axis=0)
zs = []
for x in xs:
with mx.autograd.predict_mode():
z = net.feature(x)
zs.append(z)
zss = []
for z in zs:
zss.append(z.asnumpy())
zss = np.concatenate(zss, axis=0)
#print(zss.shape)
_embeddings = zss
#_arg, _aux = model.get_params()
#__arg = {}
#for k,v in _arg.iteritems():
# __arg[k] = v.as_in_context(_ctx)
#_arg = __arg
#_arg["data"] = _data.as_in_context(_ctx)
#_arg["softmax_label"] = _label.as_in_context(_ctx)
#for k,v in _arg.iteritems():
# print(k,v.context)
#exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
#exe.forward(is_train=False)
#net_out = exe.outputs
#_embeddings = z.asnumpy()
time_now = datetime.datetime.now()
diff = time_now - time0
time_consumed+=diff.total_seconds()
#print(_embeddings.shape)
if embeddings is None:
embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
ba = bb
embeddings_list.append(embeddings)
_xnorm = 0.0
_xnorm_cnt = 0
for embed in embeddings_list:
for i in xrange(embed.shape[0]):
_em = embed[i]
_norm=np.linalg.norm(_em)
#print(_em.shape, _norm)
_xnorm+=_norm
_xnorm_cnt+=1
_xnorm /= _xnorm_cnt
embeddings = embeddings_list[0].copy()
embeddings = sklearn.preprocessing.normalize(embeddings)
acc1 = 0.0
std1 = 0.0
#_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
#acc1, std1 = np.mean(accuracy), np.std(accuracy)
#print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
#embeddings = np.concatenate(embeddings_list, axis=1)
embeddings = embeddings_list[0] + embeddings_list[1]
embeddings = sklearn.preprocessing.normalize(embeddings)
print(embeddings.shape)
print('infer time', time_consumed)
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
acc2, std2 = np.mean(accuracy), np.std(accuracy)
return acc1, std1, acc2, std2, _xnorm, embeddings_list
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='do verification')
# general
parser.add_argument('--data-dir', default='', help='')
parser.add_argument('--model', default='../model/softmax,50', help='path to load model.')
parser.add_argument('--target', default='lfw,cfp_ff,cfp_fp,agedb_30', help='test targets.')
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
parser.add_argument('--batch-size', default=32, type=int, help='')
parser.add_argument('--max', default='', type=str, help='')
parser.add_argument('--mode', default=0, type=int, help='')
parser.add_argument('--nfolds', default=10, type=int, help='')
args = parser.parse_args()
prop = face_image.load_property(args.data_dir)
image_size = prop.image_size
print('image_size', image_size)
ctx = mx.gpu(args.gpu)
nets = []
vec = args.model.split(',')
prefix = args.model.split(',')[0]
epochs = []
if len(vec)==1:
pdir = os.path.dirname(prefix)
for fname in os.listdir(pdir):
if not fname.endswith('.params'):
continue
_file = os.path.join(pdir, fname)
if _file.startswith(prefix):
epoch = int(fname.split('.')[0].split('-')[1])
epochs.append(epoch)
epochs = sorted(epochs, reverse=True)
if len(args.max)>0:
_max = [int(x) for x in args.max.split(',')]
assert len(_max)==2
if len(epochs)>_max[1]:
epochs = epochs[_max[0]:_max[1]]
else:
epochs = [int(x) for x in vec[1].split('|')]
print('model number', len(epochs))
time0 = datetime.datetime.now()
for epoch in epochs:
print('loading',prefix, epoch)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
#arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
all_layers = sym.get_internals()
sym = all_layers['fc1_output']
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))])
model.set_params(arg_params, aux_params)
nets.append(model)
time_now = datetime.datetime.now()
diff = time_now - time0
print('model loading time', diff.total_seconds())
ver_list = []
ver_name_list = []
for name in args.target.split(','):
path = os.path.join(args.data_dir,name+".bin")
if os.path.exists(path):
print('loading.. ', name)
data_set = load_bin(path, image_size)
ver_list.append(data_set)
ver_name_list.append(name)
if args.mode==0:
for i in xrange(len(ver_list)):
results = []
for model in nets:
acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)
print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
results.append(acc2)
print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
elif args.mode==1:
model = nets[0]
test_badcase(ver_list[0], model, args.batch_size, args.target)
else:
model = nets[0]
dumpR(ver_list[0], model, args.batch_size, args.target)

View File

@@ -1,6 +1,53 @@
[The Lightweight Face Recognition Challenge & Workshop](https://ibug.doc.ic.ac.uk/resources/lightweight-face-recognition-challenge-workshop/) will be held in conjunction with the International Conference on Computer Vision (ICCV) 2019, Seoul Korea.
[Test Server](http://39.104.128.76/overview)
Please strictly follow the rules. For example, please use the same [method](https://github.com/deepinsight/insightface/blob/master/common/flops_counter.py) for the FLOPs calculation regardless of your training framework is insightface or not.
[Test Server](http://www.insightface-challenge.com/overview)
**Sponsors:**
The Lightweight Face Recognition Challenge has been supported by
EPSRC project FACER2VM (EP/N007743/1)
Huawei (5000$)
DeepGlint (3000$)
iQIYI (3000$)
Kingsoft Cloud (3000$)
Pensees (3000$)
Dynamic funding pool: (17000$)
Cash sponsors and gift donations are welcome.
Contact:
insightface.challenge@gmail.com
**Discussion Group**
*For Chinese:*
![wechat](https://github.com/deepinsight/insightface/blob/master/resources/lfr19_wechat1.jpg)
*For English:*
(in #lfr2019 channel)
https://join.slack.com/t/insightface/shared_invite/enQtNjU0NDk2MjYyMTMzLTIzNDEwNmIxMjU5OGYzYzFhMjlkNjlhMTBkNWFiNjU4MTVhNTgzYjQ5ZTZiMGM3MzUyNzQ3OTBhZTg3MzM5M2I
**NEWS**
``2019.06.21`` We updated the groundtruth of Glint test dataset.
``2019.06.04`` We will clean the groundtruth on deepglint testset.
``2019.05.21`` Baseline models and training logs available.
``2019.05.16`` The four tracks (deepglint-light, deepglint-large, iQIYI-light, iQIYI-large) will equally share the dynamic funding pool (14000$). From each track, the top 3 players will share the funding pool for 50%, 30% and 20% respectively.
==================
@@ -10,20 +57,21 @@
1. Download ms1m-retinaface from [baiducloud](https://pan.baidu.com/s/1rQxJ3drqm_071vpxBtp98A) or [dropbox](https://www.dropbox.com/s/ev5ezzcz79p2hge/ms1m-retinaface-t1.zip?dl=0) and unzip it to `$INSIGHTFACE_ROOT/datasets/`
2. Go into `$INSIGHTFACE_ROOT/recognition/`
3. Refer to the `retina` dataset config section in `sample_config.py` and copy it to your own`config.py`.
3. Refer to the `retina` dataset configuration section in `sample_config.py` and copy it as your own configuration file `config.py`.
4. Start training with `CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --dataset retina --network [your-network] --loss arcface`. It will output the accuracy of lfw, cfp_fp and agedb_30 every 2000 batches by default.
5. Putting the training dataset on SSD hard disk will achieve better training efficiency.
------------------
**Testing:**
1. testdata-image from [baiducloud](https://pan.baidu.com/s/1UKUYsRfVTSzj1tfU3BVFrw) or [dropbox](https://www.dropbox.com/s/r5y6xt754m36rh8/iccv19-challenge-data-v1.zip?dl=0). These face images are all pre-processed and aligned so no need to do further modification.
2. To download testdata-video from iQIYI, please visit <http://challenge.ai.iqiyi.com/data-cluster>. You must download iQIYI-VID-FACE.z01, iQIYI-VID-FACE.z02 and iQIYI-VID-FACE.zip after signin. These face images are all pre-processed and aligned so no need to do further modification.
1. To unzip: ``zip iQIYI_VID_FACE.zip -s=0 --out iQIYI_VID_FACE_ALL.zip; unzip iQIYI_VID_FACE_ALL.zip``
2. We can get a directory named ``iQIYI_VID_FACE`` after decompression. Then we have to move ``video_filelist.txt`` in testdata-image package to ``iQIYI_VID_FACE/filelist.txt``, to indicate the order of videos in our submission feature file.
1. Download testdata-image from [baiducloud](https://pan.baidu.com/s/1UKUYsRfVTSzj1tfU3BVFrw) or [dropbox](https://www.dropbox.com/s/r5y6xt754m36rh8/iccv19-challenge-data-v1.zip?dl=0). These face images are all pre-processed and aligned.
2. To download testdata-video from iQIYI, please visit <http://challenge.ai.iqiyi.com/data-cluster>. You need to download iQIYI-VID-FACE.z01, iQIYI-VID-FACE.z02 and iQIYI-VID-FACE.zip after registration. These face frames are also pre-processed and aligned.
1. Unzip: ``zip iQIYI_VID_FACE.zip -s=0 --out iQIYI_VID_FACE_ALL.zip; unzip iQIYI_VID_FACE_ALL.zip``
2. We can get a directory named ``iQIYI_VID_FACE`` after decompression. Then, we have to move ``video_filelist.txt`` in testdata-image package to ``iQIYI_VID_FACE/filelist.txt``, to indicate the order of videos in our submission feature file.
3. To generate image feature submission file: check ``gen_image_feature.py``
4. To generate video feature submission file: check ``gen_video_feature.py``
5. Submit binary feature to the right section on test server.
5. Submit binary feature to the right track of the test server.
You can also check the verification performance during training time on LFW,CFP_FP,AgeDB_30 datasets.
@@ -35,10 +83,16 @@ Final ranking is determined by the TAR under 1:1 protocal only, for all valid su
For image testset, we evaluate the TAR under FAR@e-8 while we choose the TAR under FAR@e-4 for video testset.
For track-1, we will rank all players as following formula: ``TAR(glint-light)+TAR(iqiyi-light)``
------------------
For track-2, we will rank all players as following formula: ``TAR(glint-large)+TAR(iqiyi-large)``
**Baseline:**
1. Network y2(a deeper mobilefacenet): 933M FLOPs. TAR_image: 0.64691, TAR_video: 0.47191
2. Network r100fc(ResNet100FC-IR): 24G FLOPs. TAR_image: 0.80312, TAR_video: 0.64894
Baseline models download link: [baidu cloud](https://pan.baidu.com/s/1Em0ZFnefSoTsZoTd-9m8Nw) [dropbox](https://www.dropbox.com/s/yqaziktiv38ehrv/iccv19-baseline-models.zip?dl=0)
Training logs: [baidu cloud](https://pan.baidu.com/s/12rsp-oMzsjTeU6nugEvA9g) [dropbox](https://www.dropbox.com/s/4ufb9g7n76rfav5/iccv-baseline-log.zip?dl=0)
------------------
@@ -48,17 +102,10 @@ For track-2, we will rank all players as following formula: ``TAR(glint-large)+T
------------------
**Baseline:**
1. Network y2(a deeper mobilefacenet): 933M FLOPs. TAR_image: 0.64691, TAR_video: [TODO]
2. Network r100fc(ResNet100FC-IR): 24G FLOPs. TAR_image: 0.80312, TAR_video: [TODO]
------------------
**Candidate solutions:**
1. Use slightly deeper or wider mobile-level networks.
2. Try different training methods/losses than straightforward arcface.
1. Manually design or automatically search different networks/losses.
2. Use slightly deeper or wider mobile-level networks.
3. [OctConv](https://arxiv.org/abs/1904.05049), to reduce FLOPs.
4. [HRNet](https://arxiv.org/abs/1904.04514), for large FLOPs track.
and so on

View File

@@ -30,7 +30,7 @@ use_flip = True
def do_flip(data):
for idx in xrange(data.shape[0]):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
def get_feature(buffer):
@@ -83,7 +83,7 @@ def main(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]

View File

@@ -32,7 +32,7 @@ ctx_num = 0
def do_flip(data):
for idx in xrange(data.shape[0]):
for idx in range(data.shape[0]):
data[idx,:,:] = np.fliplr(data[idx,:,:])
def get_feature(buffer):
@@ -89,7 +89,7 @@ def main(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]

3
python-package/README.md Normal file
View File

@@ -0,0 +1,3 @@
InsightFace.ai README

View File

@@ -0,0 +1,28 @@
# coding: utf-8
# pylint: disable=wrong-import-position
"""InsightFace: A Face Analysis Toolkit."""
from __future__ import absolute_import
# mxnet version check
#mx_version = '1.4.0'
try:
import mxnet as mx
#from distutils.version import LooseVersion
#if LooseVersion(mx.__version__) < LooseVersion(mx_version):
# msg = (
# "Legacy mxnet-mkl=={} detected, some new modules may not work properly. "
# "mxnet-mkl>={} is required. You can use pip to upgrade mxnet "
# "`pip install mxnet-mkl --pre --upgrade` "
# "or `pip install mxnet-cu90mkl --pre --upgrade`").format(mx.__version__, mx_version)
# raise ImportError(msg)
except ImportError:
raise ImportError(
"Unable to import dependency mxnet. "
"A quick tip is to install via `pip install mxnet-mkl/mxnet-cu90mkl --pre`. ")
__version__ = '0.1.3'
from . import model_zoo
from . import utils
from . import app

View File

@@ -0,0 +1 @@
from .face_analysis import *

View File

@@ -0,0 +1,72 @@
from __future__ import division
import collections
import mxnet as mx
import numpy as np
from numpy.linalg import norm
import mxnet.ndarray as nd
from ..model_zoo import model_zoo
from ..utils import face_align
__all__ = ['FaceAnalysis',
'Face']
Face = collections.namedtuple('Face', [
'bbox', 'landmark', 'det_score', 'embedding', 'gender', 'age', 'embedding_norm', 'normed_embedding'])
Face.__new__.__defaults__ = (None,) * len(Face._fields)
class FaceAnalysis:
def __init__(self, det_name='retinaface_r50_v1', rec_name='arcface_r100_v1', ga_name='genderage_v1'):
assert det_name is not None
self.det_model = model_zoo.get_model(det_name)
if rec_name is not None:
self.rec_model = model_zoo.get_model(rec_name)
else:
self.rec_model = None
if ga_name is not None:
self.ga_model = model_zoo.get_model(ga_name)
else:
self.ga_model = None
def prepare(self, ctx_id, nms=0.4):
self.det_model.prepare(ctx_id, nms)
if self.rec_model is not None:
self.rec_model.prepare(ctx_id)
if self.ga_model is not None:
self.ga_model.prepare(ctx_id)
def get(self, img, det_thresh = 0.8, det_scale = 1.0, max_num = 0):
bboxes, landmarks = self.det_model.detect(img, threshold=det_thresh, scale = det_scale)
if bboxes.shape[0]==0:
return []
if max_num>0 and bboxes.shape[0]>max_num:
area = (bboxes[:,2]-bboxes[:,0])*(bboxes[:,3]-bboxes[:,1])
img_center = img.shape[0]//2, img.shape[1]//2
offsets = np.vstack([ (bboxes[:,0]+bboxes[:,2])/2-img_center[1], (bboxes[:,1]+bboxes[:,3])/2-img_center[0] ])
offset_dist_squared = np.sum(np.power(offsets,2.0),0)
bindex = np.argmax(area-offset_dist_squared*2.0) # some extra weight on the centering
bindex = bindex[0:max_num]
bboxes = bboxes[bindex, :]
landmarks = landmarks[bindex, :]
ret = []
for i in range(bboxes.shape[0]):
bbox = bboxes[i, 0:4]
det_score = bboxes[i,4]
landmark = landmarks[i]
_img = face_align.norm_crop(img, landmark = landmark)
embedding = None
embedding_norm = None
normed_embedding = None
gender = None
age = None
if self.rec_model is not None:
embedding = self.rec_model.get_embedding(_img).flatten()
embedding_norm = norm(embedding)
normed_embedding = embedding / embedding_norm
if self.ga_model is not None:
gender, age = self.ga_model.get(_img)
face = Face(bbox = bbox, landmark = landmark, det_score = det_score, embedding = embedding, gender = gender, age = age
, normed_embedding=normed_embedding, embedding_norm = embedding_norm)
ret.append(face)
return ret

View File

@@ -0,0 +1 @@
from .model_zoo import get_model, get_model_list

View File

@@ -0,0 +1,425 @@
from __future__ import division
import mxnet as mx
import numpy as np
import mxnet.ndarray as nd
__all__ = ['FaceDetector',
'retinaface_r50_v1',
'retinaface_mnet025_v1',
'retinaface_mnet025_v2',
'get_retinaface']
def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors
def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def anchors_plane(height, width, stride, base_anchors):
"""
Parameters
----------
height: height of plane
width: width of plane
stride: stride ot the original image
anchors_base: (A, 4) a base set of anchors
Returns
-------
all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
"""
A = base_anchors.shape[0]
all_anchors = np.zeros((height, width, A, 4), dtype=np.float32)
for iw in range(width):
sw = iw * stride
for ih in range(height):
sh = ih * stride
for k in range(A):
all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw
all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh
all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw
all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh
return all_anchors
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2 ** np.arange(3, 6), stride=16):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
base_anchor = np.array([1, 1, base_size, base_size]) - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])
return anchors
def generate_anchors_fpn(cfg):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
RPN_FEAT_STRIDE = []
for k in cfg:
RPN_FEAT_STRIDE.append( int(k) )
RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True)
anchors = []
for k in RPN_FEAT_STRIDE:
v = cfg[str(k)]
bs = v['BASE_SIZE']
__ratios = np.array(v['RATIOS'])
__scales = np.array(v['SCALES'])
stride = int(k)
#print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
r = generate_anchors(bs, __ratios, __scales, stride)
#print('anchors_fpn', r.shape, file=sys.stderr)
anchors.append(r)
return anchors
def clip_pad(tensor, pad_shape):
"""
Clip boxes of the pad area.
:param tensor: [n, c, H, W]
:param pad_shape: [h, w]
:return: [n, c, h, w]
"""
H, W = tensor.shape[2:]
h, w = pad_shape
if h < H or w < W:
tensor = tensor[:, :, :h, :w].copy()
return tensor
def bbox_pred(boxes, box_deltas):
"""
Transform the set of class-agnostic boxes into class-specific boxes
by applying the predicted offsets (box_deltas)
:param boxes: !important [N 4]
:param box_deltas: [N, 4 * num_classes]
:return: [N 4 * num_classes]
"""
if boxes.shape[0] == 0:
return np.zeros((0, box_deltas.shape[1]))
boxes = boxes.astype(np.float, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
dx = box_deltas[:, 0:1]
dy = box_deltas[:, 1:2]
dw = box_deltas[:, 2:3]
dh = box_deltas[:, 3:4]
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(box_deltas.shape)
# x1
pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0)
# y1
pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0)
# x2
pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0)
# y2
pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
if box_deltas.shape[1]>4:
pred_boxes[:,4:] = box_deltas[:,4:]
return pred_boxes
def landmark_pred(boxes, landmark_deltas):
if boxes.shape[0] == 0:
return np.zeros((0, landmark_deltas.shape[1]))
boxes = boxes.astype(np.float, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
pred = landmark_deltas.copy()
for i in range(5):
pred[:,i,0] = landmark_deltas[:,i,0]*widths + ctr_x
pred[:,i,1] = landmark_deltas[:,i,1]*heights + ctr_y
return pred
class FaceDetector:
def __init__(self, param_file, rac):
self.param_file = param_file
self.rac = rac
self.default_image_size = (480, 640)
def prepare(self, ctx_id, nms=0.4, fix_image_size=None):
pos = self.param_file.rfind('-')
prefix = self.param_file[0:pos]
pos2 = self.param_file.rfind('.')
epoch = int(self.param_file[pos+1:pos2])
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
if ctx_id>=0:
ctx = mx.gpu(ctx_id)
else:
ctx = mx.cpu()
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
if fix_image_size is not None:
data_shape = (1,3)+fix_image_size
else:
data_shape = (1,3)+self.default_image_size
model.bind(data_shapes=[('data', data_shape)])
model.set_params(arg_params, aux_params)
#warmup
data = mx.nd.zeros(shape=data_shape)
db = mx.io.DataBatch(data=(data,))
model.forward(db, is_train=False)
out = model.get_outputs()[0].asnumpy()
self.model = model
self.nms_threshold = nms
self.landmark_std = 1.0
_ratio = (1.,)
fmc = 3
if self.rac=='net3':
_ratio = (1.,)
elif self.rac=='net3l':
_ratio = (1.,)
self.landmark_std = 0.2
elif network=='net5': #retinaface
fmc = 5
else:
assert False, 'rac setting error %s'%self.rac
if fmc==3:
self._feat_stride_fpn = [32, 16, 8]
self.anchor_cfg = {
'32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
'16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
'8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
}
elif fmc==5:
self._feat_stride_fpn = [64, 32, 16, 8, 4]
self.anchor_cfg = {}
_ass = 2.0**(1.0/3)
_basescale = 1.0
for _stride in [4, 8, 16, 32, 64]:
key = str(_stride)
value = {'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}
scales = []
for _ in range(3):
scales.append(_basescale)
_basescale *= _ass
value['SCALES'] = tuple(scales)
self.anchor_cfg[key] = value
print(self._feat_stride_fpn, self.anchor_cfg)
self.use_landmarks = False
if len(sym)//len(self._feat_stride_fpn)==3:
self.use_landmarks = True
print('use_landmarks', self.use_landmarks)
self.fpn_keys = []
for s in self._feat_stride_fpn:
self.fpn_keys.append('stride%s'%s)
self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg)))
for k in self._anchors_fpn:
v = self._anchors_fpn[k].astype(np.float32)
self._anchors_fpn[k] = v
self.anchor_plane_cache = {}
self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
def detect(self, img, threshold=0.5, scale=1.0):
proposals_list = []
scores_list = []
landmarks_list = []
if scale==1.0:
im = img
else:
im = cv2.resize(img, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
im_info = [im.shape[0], im.shape[1]]
im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
for i in range(3):
im_tensor[0, i, :, :] = im[:, :, 2 - i]
data = nd.array(im_tensor)
db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
self.model.forward(db, is_train=False)
net_out = self.model.get_outputs()
for _idx,s in enumerate(self._feat_stride_fpn):
_key = 'stride%s'%s
stride = int(s)
if self.use_landmarks:
idx = _idx*3
else:
idx = _idx*2
scores = net_out[idx].asnumpy()
scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]
idx+=1
bbox_deltas = net_out[idx].asnumpy()
height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]
A = self._num_anchors['stride%s'%s]
K = height * width
key = (height, width, stride)
if key in self.anchor_plane_cache:
anchors = self.anchor_plane_cache[key]
else:
anchors_fpn = self._anchors_fpn['stride%s'%s]
anchors = anchors_plane(height, width, stride, anchors_fpn)
anchors = anchors.reshape((K * A, 4))
if len(self.anchor_plane_cache)<100:
self.anchor_plane_cache[key] = anchors
scores = clip_pad(scores, (height, width))
scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
bbox_deltas = clip_pad(bbox_deltas, (height, width))
bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
bbox_pred_len = bbox_deltas.shape[3]//A
bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
proposals = bbox_pred(anchors, bbox_deltas)
#proposals = clip_boxes(proposals, im_info[:2])
scores_ravel = scores.ravel()
order = np.where(scores_ravel>=threshold)[0]
proposals = proposals[order, :]
scores = scores[order]
proposals[:,0:4] /= scale
proposals_list.append(proposals)
scores_list.append(scores)
if self.use_landmarks:
idx+=1
landmark_deltas = net_out[idx].asnumpy()
landmark_deltas = clip_pad(landmark_deltas, (height, width))
landmark_pred_len = landmark_deltas.shape[1]//A
landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len//5))
landmark_deltas *= self.landmark_std
#print(landmark_deltas.shape, landmark_deltas)
landmarks = landmark_pred(anchors, landmark_deltas)
landmarks = landmarks[order, :]
landmarks[:,:,0:2] /= scale
landmarks_list.append(landmarks)
proposals = np.vstack(proposals_list)
landmarks = None
if proposals.shape[0]==0:
if self.use_landmarks:
landmarks = np.zeros( (0,5,2) )
return np.zeros( (0,5) ), landmarks
scores = np.vstack(scores_list)
scores_ravel = scores.ravel()
order = scores_ravel.argsort()[::-1]
proposals = proposals[order, :]
scores = scores[order]
if self.use_landmarks:
landmarks = np.vstack(landmarks_list)
landmarks = landmarks[order].astype(np.float32, copy=False)
pre_det = np.hstack((proposals[:,0:4], scores)).astype(np.float32, copy=False)
keep = self.nms(pre_det)
det = np.hstack( (pre_det, proposals[:,4:]) )
det = det[keep, :]
if self.use_landmarks:
landmarks = landmarks[keep]
return det, landmarks
def nms(self, dets):
thresh = self.nms_threshold
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def get_retinaface(name, rac='net3',
root='~/.insightface/models', **kwargs):
from .model_store import get_model_file
_file = get_model_file("retinaface_%s"%name, root=root)
return FaceDetector(_file, rac)
def retinaface_r50_v1(**kwargs):
return get_retinaface("r50_v1", rac='net3', **kwargs)
def retinaface_mnet025_v1(**kwargs):
return get_retinaface("mnet025_v1", rac='net3', **kwargs)
def retinaface_mnet025_v2(**kwargs):
return get_retinaface("mnet025_v2", rac='net3l', **kwargs)

View File

@@ -0,0 +1,77 @@
from __future__ import division
import mxnet as mx
import numpy as np
import cv2
__all__ = ['FaceGenderage',
'genderage_v1',
'get_genderage']
class FaceGenderage:
def __init__(self, name, download, param_file):
self.name = name
self.download = download
self.param_file = param_file
self.image_size = (112, 112)
if download:
assert param_file
def prepare(self, ctx_id):
if self.param_file:
pos = self.param_file.rfind('-')
prefix = self.param_file[0:pos]
pos2 = self.param_file.rfind('.')
epoch = int(self.param_file[pos+1:pos2])
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers['fc1_output']
if ctx_id>=0:
ctx = mx.gpu(ctx_id)
else:
ctx = mx.cpu()
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
data_shape = (1,3)+self.image_size
model.bind(data_shapes=[('data', data_shape)])
model.set_params(arg_params, aux_params)
#warmup
data = mx.nd.zeros(shape=data_shape)
db = mx.io.DataBatch(data=(data,))
model.forward(db, is_train=False)
embedding = model.get_outputs()[0].asnumpy()
self.model = model
else:
pass
def get(self, img):
assert self.param_file and self.model
assert img.shape[2]==3 and img.shape[0:2]==self.image_size
data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
data = np.transpose(data, (2,0,1))
data = np.expand_dims(data, axis=0)
data = mx.nd.array(data)
db = mx.io.DataBatch(data=(data,))
self.model.forward(db, is_train=False)
ret = self.model.get_outputs()[0].asnumpy()
g = ret[:,0:2].flatten()
gender = np.argmax(g)
a = ret[:,2:202].reshape( (100,2) )
a = np.argmax(a, axis=1)
age = int(sum(a))
return gender, age
def get_genderage(name, download=True,
root='~/.insightface/models', **kwargs):
if not download:
return FaceGenderage(name, False, None)
else:
from .model_store import get_model_file
_file = get_model_file("genderage_%s"%name, root=root)
return FaceGenderage(name, True, _file)
def genderage_v1(**kwargs):
return get_genderage("v1", download=True, **kwargs)

View File

@@ -0,0 +1,83 @@
from __future__ import division
import mxnet as mx
import numpy as np
import cv2
__all__ = ['FaceRecognition',
'arcface_r100_v1', 'arcface_outofreach_v1', 'arcface_mfn_v1',
'get_arcface']
class FaceRecognition:
def __init__(self, name, download, param_file):
self.name = name
self.download = download
self.param_file = param_file
self.image_size = (112, 112)
if download:
assert param_file
def prepare(self, ctx_id):
if self.param_file:
pos = self.param_file.rfind('-')
prefix = self.param_file[0:pos]
pos2 = self.param_file.rfind('.')
epoch = int(self.param_file[pos+1:pos2])
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
all_layers = sym.get_internals()
sym = all_layers['fc1_output']
if ctx_id>=0:
ctx = mx.gpu(ctx_id)
else:
ctx = mx.cpu()
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
data_shape = (1,3)+self.image_size
model.bind(data_shapes=[('data', data_shape)])
model.set_params(arg_params, aux_params)
#warmup
data = mx.nd.zeros(shape=data_shape)
db = mx.io.DataBatch(data=(data,))
model.forward(db, is_train=False)
embedding = model.get_outputs()[0].asnumpy()
self.model = model
else:
pass
def get_embedding(self, img):
assert self.param_file and self.model
assert img.shape[2]==3 and img.shape[0:2]==self.image_size
data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
data = np.transpose(data, (2,0,1))
data = np.expand_dims(data, axis=0)
data = mx.nd.array(data)
db = mx.io.DataBatch(data=(data,))
self.model.forward(db, is_train=False)
embedding = self.model.get_outputs()[0].asnumpy()
return embedding
def compute_sim(self, img1, img2):
emb1 = self.get_embedding(img1).flatten()
emb2 = self.get_embedding(img2).flatten()
from numpy.linalg import norm
sim = np.dot(emb1, emb2)/(norm(emb1)*norm(emb2))
return sim
def get_arcface(name, download=True,
root='~/.insightface/models', **kwargs):
if not download:
return FaceRecognition(name, False, None)
else:
from .model_store import get_model_file
_file = get_model_file("arcface_%s"%name, root=root)
return FaceRecognition(name, True, _file)
def arcface_r100_v1(**kwargs):
return get_arcface("r100_v1", download=True, **kwargs)
def arcface_mfn_v1(**kwargs):
return get_arcface("mfn_v1", download=True, **kwargs)
def arcface_outofreach_v1(**kwargs):
return get_arcface("outofreach_v1", download=False, **kwargs)

View File

@@ -0,0 +1,97 @@
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_store.py
"""
from __future__ import print_function
__all__ = ['get_model_file']
import os
import zipfile
import glob
from ..utils import download, check_sha1
_model_sha1 = {name: checksum for checksum, name in [
('95be21b58e29e9c1237f229dae534bd854009ce0', 'arcface_r100_v1'),
('', 'arcface_mfn_v1'),
('39fd1e087a2a2ed70a154ac01fecaa86c315d01b', 'retinaface_r50_v1'),
('2c9de8116d1f448fd1d4661f90308faae34c990a', 'retinaface_mnet025_v1'),
('0db1d07921d005e6c9a5b38e059452fc5645e5a4', 'retinaface_mnet025_v2'),
('7dd8111652b7aac2490c5dcddeb268e53ac643e6', 'genderage_v1'),
]}
base_repo_url = 'http://insightface.ai/files/'
_url_format = '{repo_url}models/{file_name}.zip'
def short_hash(name):
if name not in _model_sha1:
raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
return _model_sha1[name][:8]
def find_params_file(dir_path):
if not os.path.exists(dir_path):
return None
paths = glob.glob("%s/*.params"%dir_path)
if len(paths)==0:
return None
paths = sorted(paths)
return paths[-1]
def get_model_file(name, root=os.path.join('~', '.insightface', 'models')):
r"""Return location for the pretrained on local file system.
This function will download from online model zoo when model cannot be found or has mismatch.
The root directory will be created if it doesn't exist.
Parameters
----------
name : str
Name of the model.
root : str, default '~/.mxnet/models'
Location for keeping the model parameters.
Returns
-------
file_path
Path to the requested pretrained model file.
"""
file_name = name
root = os.path.expanduser(root)
dir_path = os.path.join(root, name)
file_path = find_params_file(dir_path)
#file_path = os.path.join(root, file_name + '.params')
sha1_hash = _model_sha1[name]
if file_path is not None:
if check_sha1(file_path, sha1_hash):
return file_path
else:
print('Mismatch in the content of model file detected. Downloading again.')
else:
print('Model file is not found. Downloading.')
if not os.path.exists(root):
os.makedirs(root)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
zip_file_path = os.path.join(root, file_name + '.zip')
repo_url = base_repo_url
if repo_url[-1] != '/':
repo_url = repo_url + '/'
download(_url_format.format(repo_url=repo_url, file_name=file_name),
path=zip_file_path,
overwrite=True)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(dir_path)
os.remove(zip_file_path)
file_path = find_params_file(dir_path)
if check_sha1(file_path, sha1_hash):
return file_path
else:
raise ValueError('Downloaded file has different hash. Please try again.')

View File

@@ -0,0 +1,57 @@
# pylint: disable=wildcard-import, unused-wildcard-import
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_zoo.py
"""
from .face_recognition import *
from .face_detection import *
from .face_genderage import *
#from .face_alignment import *
__all__ = ['get_model', 'get_model_list']
_models = {
'arcface_r100_v1': arcface_r100_v1,
#'arcface_mfn_v1': arcface_mfn_v1,
#'arcface_outofreach_v1': arcface_outofreach_v1,
'retinaface_r50_v1': retinaface_r50_v1,
'retinaface_mnet025_v1': retinaface_mnet025_v1,
'retinaface_mnet025_v2': retinaface_mnet025_v2,
'genderage_v1': genderage_v1,
}
def get_model(name, **kwargs):
"""Returns a pre-defined model by name
Parameters
----------
name : str
Name of the model.
root : str, default '~/.insightface/models'
Location for keeping the model parameters.
Returns
-------
Model
The model.
"""
name = name.lower()
if name not in _models:
err_str = '"%s" is not among the following model list:\n\t' % (name)
err_str += '%s' % ('\n\t'.join(sorted(_models.keys())))
raise ValueError(err_str)
net = _models[name](**kwargs)
return net
def get_model_list():
"""Get the entire list of model names in model_zoo.
Returns
-------
list of str
Entire list of model names in model_zoo.
"""
return sorted(_models.keys())

View File

@@ -0,0 +1,17 @@
from __future__ import absolute_import
#from . import bbox
#from . import viz
#from . import random
#from . import metrics
#from . import parallel
from .download import download, check_sha1
from .filesystem import makedirs
from .filesystem import try_import_dali
#from .bbox import bbox_iou
#from .block import recursive_visit, set_lr_mult, freeze_bn
#from .lr_scheduler import LRSequential, LRScheduler
#from .plot_history import TrainingHistory
#from .export_helper import export_block
#from .sync_loader_helper import split_data, split_and_load

View File

@@ -0,0 +1,90 @@
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/download.py
"""
import os
import hashlib
import requests
from tqdm import tqdm
def check_sha1(filename, sha1_hash):
"""Check whether the sha1 hash of the file content matches the expected hash.
Parameters
----------
filename : str
Path to the file.
sha1_hash : str
Expected sha1 hash in hexadecimal digits.
Returns
-------
bool
Whether the file content matches the expected hash.
"""
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
while True:
data = f.read(1048576)
if not data:
break
sha1.update(data)
sha1_file = sha1.hexdigest()
l = min(len(sha1_file), len(sha1_hash))
return sha1.hexdigest()[0:l] == sha1_hash[0:l]
def download(url, path=None, overwrite=False, sha1_hash=None):
"""Download an given URL
Parameters
----------
url : str
URL to download
path : str, optional
Destination path to store downloaded file. By default stores to the
current directory with same name as in url.
overwrite : bool, optional
Whether to overwrite destination file if already exists.
sha1_hash : str, optional
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
but doesn't match.
Returns
-------
str
The file path of the downloaded file.
"""
if path is None:
fname = url.split('/')[-1]
else:
path = os.path.expanduser(path)
if os.path.isdir(path):
fname = os.path.join(path, url.split('/')[-1])
else:
fname = path
if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
if not os.path.exists(dirname):
os.makedirs(dirname)
print('Downloading %s from %s...'%(fname, url))
r = requests.get(url, stream=True)
if r.status_code != 200:
raise RuntimeError("Failed downloading url %s"%url)
total_length = r.headers.get('content-length')
with open(fname, 'wb') as f:
if total_length is None: # no content length header
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
else:
total_length = int(total_length)
for chunk in tqdm(r.iter_content(chunk_size=1024),
total=int(total_length / 1024. + 0.5),
unit='KB', unit_scale=False, dynamic_ncols=True):
f.write(chunk)
if sha1_hash and not check_sha1(fname, sha1_hash):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(fname))
return fname

View File

@@ -0,0 +1,88 @@
import cv2
import numpy as np
from skimage import transform as trans
src1 = np.array([
[51.642,50.115],
[57.617,49.990],
[35.740,69.007],
[51.157,89.050],
[57.025,89.702]], dtype=np.float32)
#<--left
src2 = np.array([
[45.031,50.118],
[65.568,50.872],
[39.677,68.111],
[45.177,86.190],
[64.246,86.758]], dtype=np.float32)
#---frontal
src3 = np.array([
[39.730,51.138],
[72.270,51.138],
[56.000,68.493],
[42.463,87.010],
[69.537,87.010]], dtype=np.float32)
#-->right
src4 = np.array([
[46.845,50.872],
[67.382,50.118],
[72.737,68.111],
[48.167,86.758],
[67.236,86.190]], dtype=np.float32)
#-->right profile
src5 = np.array([
[54.796,49.990],
[60.771,50.115],
[76.673,69.007],
[55.388,89.702],
[61.257,89.050]], dtype=np.float32)
src = np.array([src1,src2,src3,src4,src5])
src_map = {112 : src, 224 : src*2}
arcface_src = np.array([
[38.2946, 51.6963],
[73.5318, 51.5014],
[56.0252, 71.7366],
[41.5493, 92.3655],
[70.7299, 92.2041] ], dtype=np.float32 )
arcface_src = np.expand_dims(arcface_src, axis=0)
# In[66]:
# lmk is prediction; src is template
def estimate_norm(lmk, image_size = 112, mode='arcface'):
assert lmk.shape==(5,2)
tform = trans.SimilarityTransform()
lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
min_M = []
min_index = []
min_error = float('inf')
if mode=='arcface':
assert image_size==112
src = arcface_src
else:
src = src_map[image_size]
for i in np.arange(src.shape[0]):
tform.estimate(lmk, src[i])
M = tform.params[0:2,:]
results = np.dot(M, lmk_tran.T)
results = results.T
error = np.sum(np.sqrt(np.sum((results - src[i]) ** 2,axis=1)))
# print(error)
if error< min_error:
min_error = error
min_M = M
min_index = i
return min_M, min_index
def norm_crop(img, landmark, image_size=112, mode='arcface'):
M, pose_index = estimate_norm(landmark, image_size, mode)
warped = cv2.warpAffine(img,M, (image_size, image_size), borderValue = 0.0)
return warped

View File

@@ -0,0 +1,137 @@
"""
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/filesystem.py
"""
import os
import errno
def makedirs(path):
"""Create directory recursively if not exists.
Similar to `makedir -p`, you can skip checking existence before this function.
Parameters
----------
path : str
Path of the desired dir
"""
try:
os.makedirs(path)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
def try_import(package, message=None):
"""Try import specified package, with custom message support.
Parameters
----------
package : str
The name of the targeting package.
message : str, default is None
If not None, this function will raise customized error message when import error is found.
Returns
-------
module if found, raise ImportError otherwise
"""
try:
return __import__(package)
except ImportError as e:
if not message:
raise e
raise ImportError(message)
def try_import_cv2():
"""Try import cv2 at runtime.
Returns
-------
cv2 module if found. Raise ImportError otherwise
"""
msg = "cv2 is required, you can install by package manager, e.g. 'apt-get', \
or `pip install opencv-python --user` (note that this is unofficial PYPI package)."
return try_import('cv2', msg)
def try_import_mmcv():
"""Try import mmcv at runtime.
Returns
-------
mmcv module if found. Raise ImportError otherwise
"""
msg = "mmcv is required, you can install by first `pip install Cython --user` \
and then `pip install mmcv --user` (note that this is unofficial PYPI package)."
return try_import('mmcv', msg)
def try_import_rarfile():
"""Try import rarfile at runtime.
Returns
-------
rarfile module if found. Raise ImportError otherwise
"""
msg = "rarfile is required, you can install by first `sudo apt-get install unrar` \
and then `pip install rarfile --user` (note that this is unofficial PYPI package)."
return try_import('rarfile', msg)
def import_try_install(package, extern_url=None):
"""Try import the specified package.
If the package not installed, try use pip to install and import if success.
Parameters
----------
package : str
The name of the package trying to import.
extern_url : str or None, optional
The external url if package is not hosted on PyPI.
For example, you can install a package using:
"pip install git+http://github.com/user/repo/tarball/master/egginfo=xxx".
In this case, you can pass the url to the extern_url.
Returns
-------
<class 'Module'>
The imported python module.
"""
try:
return __import__(package)
except ImportError:
try:
from pip import main as pipmain
except ImportError:
from pip._internal import main as pipmain
# trying to install package
url = package if extern_url is None else extern_url
pipmain(['install', '--user', url]) # will raise SystemExit Error if fails
# trying to load again
try:
return __import__(package)
except ImportError:
import sys
import site
user_site = site.getusersitepackages()
if user_site not in sys.path:
sys.path.append(user_site)
return __import__(package)
return __import__(package)
def try_import_dali():
"""Try import NVIDIA DALI at runtime.
"""
try:
dali = __import__('nvidia.dali', fromlist=['pipeline', 'ops', 'types'])
dali.Pipeline = dali.pipeline.Pipeline
except ImportError:
class dali:
class Pipeline:
def __init__(self):
raise NotImplementedError(
"DALI not found, please check if you installed it correctly.")
return dali

61
python-package/setup.py Normal file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python
import os
import io
import re
import shutil
import sys
from setuptools import setup, find_packages
def read(*names, **kwargs):
with io.open(
os.path.join(os.path.dirname(__file__), *names),
encoding=kwargs.get("encoding", "utf8")
) as fp:
return fp.read()
def find_version(*file_paths):
version_file = read(*file_paths)
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
version_file, re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
try:
import pypandoc
long_description = pypandoc.convert('README.md', 'rst')
except(IOError, ImportError):
long_description = open('README.md').read()
VERSION = find_version('insightface', '__init__.py')
requirements = [
'numpy',
'tqdm',
'requests',
'matplotlib',
'Pillow',
'scipy',
'opencv-python',
'scikit-learn',
'scikit-image',
'easydict',
]
setup(
# Metadata
name='insightface',
version=VERSION,
author='InsightFace Contributors',
url='https://github.com/deepinsight/insightface',
description='InsightFace Toolkit',
long_description=long_description,
license='Apache-2.0',
# Package info
packages=find_packages(exclude=('docs', 'tests', 'scripts')),
zip_safe=True,
include_package_data=True,
install_requires=requirements,
)

View File

@@ -0,0 +1,124 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#import mxnet as mx
#from mxnet import ndarray as nd
import argparse
import cv2
import pickle
import numpy as np
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'common'))
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'RetinaFace'))
import face_align
from retinaface import RetinaFace
def to_rgb(img):
w, h = img.shape
ret = np.empty((w, h, 3), dtype=np.uint8)
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
return ret
def IOU(Reframe,GTframe):
x1 = Reframe[0];
y1 = Reframe[1];
width1 = Reframe[2]-Reframe[0];
height1 = Reframe[3]-Reframe[1];
x2 = GTframe[0]
y2 = GTframe[1]
width2 = GTframe[2]-GTframe[0]
height2 = GTframe[3]-GTframe[1]
endx = max(x1+width1,x2+width2)
startx = min(x1,x2)
width = width1+width2-(endx-startx)
endy = max(y1+height1,y2+height2)
starty = min(y1,y2)
height = height1+height2-(endy-starty)
if width <=0 or height <= 0:
ratio = 0
else:
Area = width*height
Area1 = width1*height1
Area2 = width2*height2
ratio = Area*1./(Area1+Area2-Area)
return ratio
parser = argparse.ArgumentParser(description='Package eval images')
# general
parser.add_argument('--data-dir', default='', help='')
parser.add_argument('--image-size', type=int, default=112, help='')
parser.add_argument('--gpu', type=int, default=0, help='')
parser.add_argument('--det-prefix', type=str, default='./model/R50', help='')
parser.add_argument('--output', default='./', help='path to save.')
parser.add_argument('--align-mode', default='arcface', help='align mode.')
args = parser.parse_args()
gpu_id = args.gpu
detector = RetinaFace(args.det_prefix, 0, gpu_id, network='net3')
target_size = 400
max_size = 800
def get_norm_crop(image_path):
im = cv2.imread(image_path)
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# prevent bigger axis from being more than max_size:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
bbox, landmark = detector.detect(im, threshold=0.5, scales=[im_scale])
#print(im.shape, bbox.shape, landmark.shape)
if bbox.shape[0]==0:
bbox, landmark = detector.detect(im, threshold=0.05, scales=[im_scale*0.75, im_scale, im_scale*2.0])
print('refine', im.shape, bbox.shape, landmark.shape)
nrof_faces = bbox.shape[0]
if nrof_faces>0:
det = bbox[:,0:4]
img_size = np.asarray(im.shape)[0:2]
bindex = 0
if nrof_faces>1:
bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
img_center = img_size / 2
offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
offset_dist_squared = np.sum(np.power(offsets,2.0),0)
bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
#_bbox = bounding_boxes[bindex, 0:4]
_landmark = landmark[bindex]
warped = face_align.norm_crop(im, landmark = _landmark, image_size=args.image_size, mode=args.align_mode)
return warped
else:
return None
bins = []
issame_list = []
pp = 0
for line in open(os.path.join(args.data_dir, 'pairs_label.txt'), 'r'):
pp+=1
if pp%100==0:
print('processing', pp)
line = line.strip().split()
assert len(line)==3
path1 = os.path.join(args.data_dir, line[0])
path2 = os.path.join(args.data_dir, line[1])
im1 = get_norm_crop(path1)
im2 = get_norm_crop(path2)
issame = True
if line[2]=='0':
issame = False
issame_list.append(issame)
for im in [im1, im2]:
_, s = cv2.imencode('.jpg', im)
bins.append(s)
with open(args.output, 'wb') as f:
pickle.dump((bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)

File diff suppressed because it is too large Load Diff

View File

@@ -221,7 +221,7 @@ def test(lfw_set, mx_model, batch_size):
issame_list = lfw_set[1]
model = mx_model
embeddings_list = []
for i in xrange( len(lfw_data_list) ):
for i in range( len(lfw_data_list) ):
lfw_data = lfw_data_list[i]
embeddings = None
ba = 0
@@ -256,7 +256,7 @@ def test(lfw_set, mx_model, batch_size):
_xnorm = 0.0
_xnorm_cnt = 0
for embed in embeddings_list:
for i in xrange(embed.shape[0]):
for i in range(embed.shape[0]):
_em = embed[i]
_norm=np.linalg.norm(_em)
#print(_em.shape, _norm)

View File

@@ -180,12 +180,17 @@ def evaluate(embeddings, actual_issame, nrof_folds=10, pca = 0):
return tpr, fpr, accuracy, val, val_std, far
def load_bin(path, image_size):
bins, issame_list = pickle.load(open(path, 'rb'))
try:
with open(path, 'rb') as f:
bins, issame_list = pickle.load(f) #py2
except UnicodeDecodeError as e:
with open(path, 'rb') as f:
bins, issame_list = pickle.load(f, encoding='bytes') #py3
data_list = []
for flip in [0,1]:
data = nd.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
data_list.append(data)
for i in xrange(len(issame_list)*2):
for i in range(len(issame_list)*2):
_bin = bins[i]
img = mx.image.imdecode(_bin)
if img.shape[1]!=image_size[0]:
@@ -213,7 +218,7 @@ def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_sha
_label = nd.ones( (batch_size,) )
else:
_label = nd.ones( label_shape )
for i in xrange( len(data_list) ):
for i in range( len(data_list) ):
data = data_list[i]
embeddings = None
ba = 0
@@ -255,7 +260,7 @@ def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_sha
_xnorm = 0.0
_xnorm_cnt = 0
for embed in embeddings_list:
for i in xrange(embed.shape[0]):
for i in range(embed.shape[0]):
_em = embed[i]
_norm=np.linalg.norm(_em)
#print(_em.shape, _norm)
@@ -293,7 +298,7 @@ def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, lab
_label = nd.ones( (batch_size,) )
else:
_label = nd.ones( label_shape )
for i in xrange( len(data_list) ):
for i in range( len(data_list) ):
data = data_list[i]
embeddings = None
ba = 0
@@ -438,7 +443,7 @@ def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, lab
# imgb = cv2.transpose(imgb)
# imgb = cv2.flip(imgb, 0)
#else:
# for ii in xrange(2):
# for ii in range(2):
# imgb = cv2.transpose(imgb)
# imgb = cv2.flip(imgb, 1)
dist = out[2]
@@ -469,7 +474,7 @@ def dumpR(data_set, mx_model, batch_size, name='', data_extra = None, label_shap
_label = nd.ones( (batch_size,) )
else:
_label = nd.ones( label_shape )
for i in xrange( len(data_list) ):
for i in range( len(data_list) ):
data = data_list[i]
embeddings = None
ba = 0
@@ -571,7 +576,7 @@ if __name__ == '__main__':
ver_name_list.append(name)
if args.mode==0:
for i in xrange(len(ver_list)):
for i in range(len(ver_list)):
results = []
for model in nets:
acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)

View File

@@ -12,8 +12,6 @@ import sklearn
import datetime
import numpy as np
import cv2
from PIL import Image
from io import BytesIO
import mxnet as mx
from mxnet import ndarray as nd
@@ -166,11 +164,13 @@ class FaceImageIter(io.DataIter):
def mirror_aug(self, img):
_rd = random.randint(0,1)
if _rd==1:
for c in xrange(img.shape[2]):
for c in range(img.shape[2]):
img[:,:,c] = np.fliplr(img[:,:,c])
return img
def compress_aug(self, img):
from PIL import Image
from io import BytesIO
buf = BytesIO()
img = Image.fromarray(img.asnumpy(), 'RGB')
q = random.randint(2, 20)

View File

@@ -122,7 +122,8 @@ class ParallModule(BaseModule):
#ag = {}
#ax = {}
rk = []
for k,v in g.iteritems():
for k in g:
v = g[k]
if k.startswith('fc7'):
p1 = k.find('_')
p2 = k.rfind('_')
@@ -131,10 +132,6 @@ class ParallModule(BaseModule):
rk.append(k)
for k in rk:
del g[k]
#for k,v in g.iteritems():
# print('g', k, v.shape)
#for k,v in ag.iteritems():
# print('ag', k, v.shape)
self._curr_module.set_params(g, x)
#self._arcface_module.set_params(ag, ax)

View File

@@ -606,6 +606,12 @@ def get_symbol():
units = [3, 8, 35, 3]
elif num_layers == 100:
units = [3, 13, 30, 3]
elif num_layers == 134:
units = [3, 10, 50, 3]
elif num_layers == 136:
units = [3, 13, 48, 3]
elif num_layers == 140:
units = [3, 15, 48, 3]
elif num_layers == 124:
units = [3, 13, 40, 5]
elif num_layers == 160:

View File

@@ -149,7 +149,7 @@ def train_net(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]
@@ -270,7 +270,7 @@ def train_net(args):
def ver_test(nbatch):
results = []
for i in xrange(len(ver_list)):
for i in range(len(ver_list)):
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None)
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
@@ -281,7 +281,7 @@ def train_net(args):
highest_acc = [0.0, 0.0] #lfw and target
#for i in xrange(len(ver_list)):
#for i in range(len(ver_list)):
# highest_acc.append(0.0)
global_step = [0]
save_step = [0]

View File

@@ -62,6 +62,7 @@ def parse_args():
parser.add_argument('--per-batch-size', type=int, default=default.per_batch_size, help='batch size in each context')
parser.add_argument('--kvstore', type=str, default=default.kvstore, help='kvstore setting')
parser.add_argument('--worker-id', type=int, default=0, help='worker id for dist training, starts from 0')
parser.add_argument('--extra-model-name', type=str, default='', help='extra model name')
args = parser.parse_args()
return args
@@ -126,14 +127,17 @@ def train_net(args):
ctx = []
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
if len(cvd)>0:
for i in xrange(len(cvd.split(','))):
for i in range(len(cvd.split(','))):
ctx.append(mx.gpu(i))
if len(ctx)==0:
ctx = [mx.cpu()]
print('use cpu')
else:
print('gpu num:', len(ctx))
prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model')
if len(args.extra_model_name)==0:
prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model')
else:
prefix = os.path.join(args.models_root, '%s-%s-%s-%s'%(args.network, args.loss, args.dataset, args.extra_model_name), 'model')
prefix_dir = os.path.dirname(prefix)
print('prefix', prefix)
if not os.path.exists(prefix_dir):
@@ -249,7 +253,7 @@ def train_net(args):
def ver_test(nbatch):
results = []
for i in xrange(len(ver_list)):
for i in range(len(ver_list)):
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None)
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
@@ -259,7 +263,7 @@ def train_net(args):
highest_acc = [0.0, 0.0] #lfw and target
#for i in xrange(len(ver_list)):
#for i in range(len(ver_list)):
# highest_acc.append(0.0)
global_step = [0]
save_step = [0]

View File

@@ -111,12 +111,12 @@ class FaceImageIter(io.DataIter):
def pairwise_dists(self, embeddings):
nd_embedding_list = []
for i in xrange(self.ctx_num):
for i in range(self.ctx_num):
nd_embedding = mx.nd.array(embeddings, mx.gpu(i))
nd_embedding_list.append(nd_embedding)
nd_pdists = []
pdists = []
for idx in xrange(embeddings.shape[0]):
for idx in range(embeddings.shape[0]):
emb_idx = idx%self.ctx_num
nd_embedding = nd_embedding_list[emb_idx]
a_embedding = nd_embedding[idx]
@@ -138,16 +138,16 @@ class FaceImageIter(io.DataIter):
pdists = self.pairwise_dists(embeddings)
#self.times[3] += self.time_elapsed()
for i in xrange(people_per_batch):
for i in range(people_per_batch):
nrof_images = int(nrof_images_per_class[i])
for j in xrange(1,nrof_images):
for j in range(1,nrof_images):
#self.time_reset()
a_idx = emb_start_idx + j - 1
#neg_dists_sqr = np.sum(np.square(embeddings[a_idx] - embeddings), 1)
neg_dists_sqr = pdists[a_idx]
#self.times[3] += self.time_elapsed()
for pair in xrange(j, nrof_images): # For every possible positive pair.
for pair in range(j, nrof_images): # For every possible positive pair.
p_idx = emb_start_idx + pair
#self.time_reset()
pos_dist_sqr = np.sum(np.square(embeddings[a_idx]-embeddings[p_idx]))
@@ -234,7 +234,7 @@ class FaceImageIter(io.DataIter):
#_label = _batch.label[0].asnumpy()
#data[ba:bb,:,:,:] = _data
#label[ba:bb] = _label
for i in xrange(ba, bb):
for i in range(ba, bb):
#print(ba, bb, self.triplet_cur, i, len(self.triplet_seq))
_idx = self.triplet_seq[i+self.triplet_cur]
s = self.imgrec.read_idx(_idx)
@@ -269,7 +269,7 @@ class FaceImageIter(io.DataIter):
self.times[1] += self.time_elapsed()
self.time_reset()
nrof_images_per_class = [1]
for i in xrange(1, bag_size):
for i in range(1, bag_size):
if tag[i][0]==tag[i-1][0]:
nrof_images_per_class[-1]+=1
else:
@@ -283,7 +283,7 @@ class FaceImageIter(io.DataIter):
if bb>len(triplets):
break
_triplets = triplets[ba:bb]
for i in xrange(3):
for i in range(3):
for triplet in _triplets:
_pos = triplet[i]
_idx = tag[_pos][1]
@@ -306,7 +306,7 @@ class FaceImageIter(io.DataIter):
print('loading batch',batch_num, ba)
bb = min(ba+self.batch_size, len(self.oseq))
_count = bb-ba
for i in xrange(_count):
for i in range(_count):
idx = self.oseq[i+ba]
s = self.imgrec.read_idx(idx)
header, img = recordio.unpack(s)
@@ -323,7 +323,7 @@ class FaceImageIter(io.DataIter):
if X is None:
X = np.zeros( (len(self.id2range), nembedding.shape[1]), dtype=np.float32 )
nplabel = label.asnumpy()
for i in xrange(_count):
for i in range(_count):
ilabel = int(nplabel[i])
#print(ilabel, ilabel.__class__)
X[ilabel] += nembedding[i]
@@ -331,14 +331,14 @@ class FaceImageIter(io.DataIter):
X = sklearn.preprocessing.normalize(X)
d = X.shape[1]
t = AnnoyIndex(d, metric='euclidean')
for i in xrange(X.shape[0]):
for i in range(X.shape[0]):
t.add_item(i, X[i])
print('start to build index')
t.build(20)
print(X.shape)
k = self.per_identities
self.seq = []
for i in xrange(X.shape[0]):
for i in range(X.shape[0]):
nnlist = t.get_nns_by_item(i, k)
assert nnlist[0]==i
for _label in nnlist:
@@ -350,7 +350,7 @@ class FaceImageIter(io.DataIter):
random.shuffle(_list)
else:
_list = np.random.choice(_list, self.images_per_identity, replace=False)
for i in xrange(self.images_per_identity):
for i in range(self.images_per_identity):
_idx = _list[i%len(_list)]
self.seq.append(_idx)
#faiss_params = [20,5]
@@ -365,9 +365,9 @@ class FaceImageIter(io.DataIter):
#D, I = index.search(X, k) # actual search
#print(I.shape)
#self.seq = []
#for i in xrange(I.shape[0]):
#for i in range(I.shape[0]):
# #assert I[i][0]==i
# for j in xrange(k):
# for j in range(k):
# _label = I[i][j]
# assert _label<len(self.id2range)
# _id = self.header0[0]+_label
@@ -377,7 +377,7 @@ class FaceImageIter(io.DataIter):
# random.shuffle(_list)
# else:
# _list = np.random.choice(_list, self.images_per_identity, replace=False)
# for i in xrange(self.images_per_identity):
# for i in range(self.images_per_identity):
# _idx = _list[i%len(_list)]
# self.seq.append(_idx)
@@ -391,9 +391,10 @@ class FaceImageIter(io.DataIter):
elif not self.hard_mining:
self.seq = []
idlist = []
for _id,v in self.id2range.iteritems():
for _id in self.id2range:
v = self.id2range[_id]
idlist.append((_id,range(*v)))
for r in xrange(self.repeat):
for r in range(self.repeat):
if r%10==0:
print('repeat', r)
if self.shuffle:
@@ -406,7 +407,7 @@ class FaceImageIter(io.DataIter):
random.shuffle(_list)
else:
_list = np.random.choice(_list, self.images_per_identity, replace=False)
for i in xrange(self.images_per_identity):
for i in range(self.images_per_identity):
_idx = _list[i%len(_list)]
self.seq.append(_idx)
else:
@@ -470,7 +471,7 @@ class FaceImageIter(io.DataIter):
def mirror_aug(self, img):
_rd = random.randint(0,1)
if _rd==1:
for c in xrange(img.shape[2]):
for c in range(img.shape[2]):
img[:,:,c] = np.fliplr(img[:,:,c])
return img

BIN
resources/lfr19_wechat1.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB