mirror of
https://github.com/deepinsight/insightface.git
synced 2026-05-17 14:26:08 +00:00
Merge branch 'master' of https://github.com/deepinsight/insightface
This commit is contained in:
6
PRNet.mxnet/README.md
Normal file
6
PRNet.mxnet/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
MXNet implementation of [Joint 3D Face Reconstruction and Dense Alignment with Position Map Regression Network](http://openaccess.thecvf.com/content_ECCV_2018/papers/Yao_Feng_Joint_3D_Face_ECCV_2018_paper.pdf).
|
||||
|
||||
Original [PyTorch implementation](https://github.com/YadiraF/PRNet)
|
||||
|
||||
Pretrained Models and details coming soon.
|
||||
|
||||
89
PRNet.mxnet/config.py
Normal file
89
PRNet.mxnet/config.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
from easydict import EasyDict as edict
|
||||
|
||||
config = edict()
|
||||
|
||||
#default training/dataset config
|
||||
config.num_classes = 3
|
||||
config.input_img_size = 256
|
||||
config.output_label_size = 64
|
||||
|
||||
# network settings
|
||||
network = edict()
|
||||
|
||||
network.hourglass = edict()
|
||||
network.hourglass.net_sta = 0
|
||||
network.hourglass.net_n = 4
|
||||
network.hourglass.net_dcn = 0
|
||||
network.hourglass.net_stacks = 1
|
||||
network.hourglass.net_block = 'resnet'
|
||||
network.hourglass.net_binarize = False
|
||||
network.hourglass.losstype = 'heatmap'
|
||||
network.hourglass.multiplier = 1.0
|
||||
|
||||
network.prnet = edict()
|
||||
network.prnet.net_sta = 0
|
||||
network.prnet.net_n = 5
|
||||
network.prnet.net_dcn = 0
|
||||
network.prnet.net_stacks = 1
|
||||
network.prnet.net_modules = 2
|
||||
network.prnet.net_block = 'hpm'
|
||||
network.prnet.net_binarize = False
|
||||
network.prnet.losstype = 'heatmap'
|
||||
network.prnet.multiplier = 0.25
|
||||
|
||||
network.hpm = edict()
|
||||
network.hpm.net_sta = 0
|
||||
network.hpm.net_n = 4
|
||||
network.hpm.net_dcn = 0
|
||||
network.hpm.net_stacks = 1
|
||||
network.hpm.net_block = 'hpm'
|
||||
network.hpm.net_binarize = False
|
||||
network.hpm.losstype = 'heatmap'
|
||||
network.hpm.multiplier = 1.0
|
||||
|
||||
|
||||
# dataset settings
|
||||
dataset = edict()
|
||||
|
||||
|
||||
dataset.prnet = edict()
|
||||
dataset.prnet.dataset = '3D'
|
||||
dataset.prnet.landmark_type = 'dense'
|
||||
dataset.prnet.dataset_path = './data64'
|
||||
dataset.prnet.num_classes = 3
|
||||
dataset.prnet.input_img_size = 256
|
||||
dataset.prnet.output_label_size = 64
|
||||
#dataset.prnet.label_xfirst = False
|
||||
dataset.prnet.val_targets = ['']
|
||||
|
||||
# default settings
|
||||
default = edict()
|
||||
|
||||
# default network
|
||||
default.network = 'hpm'
|
||||
default.pretrained = ''
|
||||
default.pretrained_epoch = 0
|
||||
# default dataset
|
||||
default.dataset = 'prnet'
|
||||
default.frequent = 20
|
||||
default.verbose = 200
|
||||
default.kvstore = 'device'
|
||||
|
||||
default.prefix = 'model/A'
|
||||
default.end_epoch = 10000
|
||||
default.lr = 0.00025
|
||||
default.wd = 0.0
|
||||
default.per_batch_size = 20
|
||||
default.lr_step = '16000,24000,30000'
|
||||
|
||||
def generate_config(_network, _dataset):
|
||||
for k, v in network[_network].items():
|
||||
config[k] = v
|
||||
default[k] = v
|
||||
for k, v in dataset[_dataset].items():
|
||||
config[k] = v
|
||||
default[k] = v
|
||||
config.network = _network
|
||||
config.dataset = _dataset
|
||||
|
||||
164
PRNet.mxnet/data.py
Normal file
164
PRNet.mxnet/data.py
Normal file
@@ -0,0 +1,164 @@
|
||||
# pylint: skip-file
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import sys, os
|
||||
import random
|
||||
import glob
|
||||
import math
|
||||
import scipy.misc
|
||||
import cv2
|
||||
import logging
|
||||
import sklearn
|
||||
import datetime
|
||||
import img_helper
|
||||
from mxnet.io import DataIter
|
||||
from mxnet import ndarray as nd
|
||||
from mxnet import io
|
||||
from mxnet import recordio
|
||||
from PIL import Image
|
||||
from config import config
|
||||
from skimage import transform as tf
|
||||
|
||||
|
||||
class FaceSegIter(DataIter):
|
||||
def __init__(self, path, batch_size,
|
||||
per_batch_size = 0,
|
||||
aug_level = 0,
|
||||
force_mirror = False,
|
||||
exf = 1,
|
||||
args = None):
|
||||
self.aug_level = aug_level
|
||||
self.force_mirror = force_mirror
|
||||
self.exf = exf
|
||||
self.batch_size = batch_size
|
||||
self.per_batch_size = per_batch_size
|
||||
self.image_file_list = []
|
||||
self.uv_file_list = []
|
||||
for _file in glob.glob(os.path.join(path, '*.jpg')):
|
||||
self.image_file_list.append(_file)
|
||||
for img in self.image_file_list:
|
||||
uv_file = img[0:-3]+"npy"
|
||||
self.uv_file_list.append(uv_file)
|
||||
self.seq = range(len(self.image_file_list))
|
||||
print('train size', len(self.seq))
|
||||
self.cur = 0
|
||||
self.reset()
|
||||
self.data_shape = (3, config.input_img_size, config.input_img_size)
|
||||
self.num_classes = config.num_classes
|
||||
self.input_img_size = config.input_img_size
|
||||
#self.label_classes = self.num_classes
|
||||
self.output_label_size = config.output_label_size
|
||||
#if aug_level>0:
|
||||
# self.output_label_size = config.output_label_size
|
||||
#else:
|
||||
# self.output_label_size = self.input_img_size
|
||||
self.label_shape = (self.num_classes, self.output_label_size, self.output_label_size)
|
||||
self.provide_data = [('data', (batch_size,) + self.data_shape)]
|
||||
self.provide_label = [('softmax_label', (batch_size,) + self.label_shape),
|
||||
('mask_label', (batch_size,)+ self.label_shape)]
|
||||
weight_mask = cv2.imread('./uv-data/uv_weight_mask.png')
|
||||
print('weight_mask', weight_mask.shape)
|
||||
if weight_mask.shape[0]!=self.output_label_size:
|
||||
weight_mask = cv2.resize(weight_mask, (self.output_label_size, self.output_label_size) )
|
||||
#idx = np.where(weight_mask>0)[0]
|
||||
#print('weight idx', idx)
|
||||
weight_mask = weight_mask.astype(np.float32)
|
||||
weight_mask /= 255.0
|
||||
|
||||
vis_mask = cv2.imread('./uv-data/uv_face_mask.png')
|
||||
print('vis_mask', vis_mask.shape)
|
||||
if vis_mask.shape[0]!=self.output_label_size:
|
||||
vis_mask = cv2.resize(vis_mask, (self.output_label_size, self.output_label_size) )
|
||||
vis_mask = vis_mask.astype(np.float32)
|
||||
vis_mask /= 255.0
|
||||
weight_mask *= vis_mask
|
||||
print('weight_mask', weight_mask.shape)
|
||||
weight_mask = weight_mask.transpose( (2,0,1) )
|
||||
#WM = np.zeros( (batch_size,)+self.label_shape, dtype=np.float32 )
|
||||
#for i in range(batch_size):
|
||||
# WM[i] = weight_mask
|
||||
#weight_mask = WM
|
||||
#weight_mask = weight_mask.reshape( (1, 3, weight_mask.shape[0], weight_mask.shape[1]) )
|
||||
weight_mask = weight_mask[np.newaxis,:,:,:]
|
||||
print('weight_mask', weight_mask.shape)
|
||||
weight_mask = np.tile(weight_mask, (batch_size,1,1,1))
|
||||
print('weight_mask', weight_mask.shape)
|
||||
self.weight_mask = nd.array(weight_mask)
|
||||
self.img_num = 0
|
||||
self.invalid_num = 0
|
||||
self.mode = 1
|
||||
self.vis = 0
|
||||
self.stats = [0,0]
|
||||
|
||||
def get_data_shape(self):
|
||||
return self.data_shape
|
||||
|
||||
#def get_label_shape(self):
|
||||
# return self.label_shape
|
||||
|
||||
def get_shape_dict(self):
|
||||
D = {}
|
||||
for (k,v) in self.provide_data:
|
||||
D[k] = v
|
||||
for (k,v) in self.provide_label:
|
||||
D[k] = v
|
||||
return D
|
||||
|
||||
def get_label_names(self):
|
||||
D = []
|
||||
for (k,v) in self.provide_label:
|
||||
D.append(k)
|
||||
return D
|
||||
|
||||
def reset(self):
|
||||
#print('reset')
|
||||
self.cur = 0
|
||||
if self.aug_level>0:
|
||||
random.shuffle(self.seq)
|
||||
|
||||
def next_sample(self):
|
||||
"""Helper function for reading in next sample."""
|
||||
if self.cur >= len(self.seq):
|
||||
raise StopIteration
|
||||
idx = self.seq[self.cur]
|
||||
self.cur += 1
|
||||
uv_path = self.uv_file_list[idx]
|
||||
image_path = self.image_file_list[idx]
|
||||
uvmap = np.load(uv_path)
|
||||
img = cv2.imread(image_path)[:,:,::-1]#to rgb
|
||||
hlabel = uvmap
|
||||
#print(hlabel.shape)
|
||||
#hlabel = np.array(header.label).reshape( (self.output_label_size, self.output_label_size, self.num_classes) )
|
||||
hlabel /= self.input_img_size
|
||||
|
||||
return img, hlabel
|
||||
|
||||
|
||||
def next(self):
|
||||
"""Returns the next batch of data."""
|
||||
#print('next')
|
||||
batch_size = self.batch_size
|
||||
batch_data = nd.empty((batch_size,)+self.data_shape)
|
||||
batch_label = nd.empty((batch_size,)+self.label_shape)
|
||||
i = 0
|
||||
#self.cutoff = random.randint(800,1280)
|
||||
try:
|
||||
while i < batch_size:
|
||||
#print('N', i)
|
||||
data, label = self.next_sample()
|
||||
data = nd.array(data)
|
||||
data = nd.transpose(data, axes=(2, 0, 1))
|
||||
label = nd.array(label)
|
||||
label = nd.transpose(label, axes=(2, 0, 1))
|
||||
batch_data[i][:] = data
|
||||
batch_label[i][:] = label
|
||||
i += 1
|
||||
except StopIteration:
|
||||
if i<batch_size:
|
||||
raise StopIteration
|
||||
|
||||
#return {self.data_name : batch_data,
|
||||
# self.label_name : batch_label}
|
||||
#print(batch_data.shape, batch_label.shape)
|
||||
return mx.io.DataBatch([batch_data], [batch_label, self.weight_mask], batch_size - i)
|
||||
|
||||
99
PRNet.mxnet/metric.py
Normal file
99
PRNet.mxnet/metric.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import math
|
||||
import cv2
|
||||
from config import config
|
||||
|
||||
class LossValueMetric(mx.metric.EvalMetric):
|
||||
def __init__(self):
|
||||
self.axis = 1
|
||||
super(LossValueMetric, self).__init__(
|
||||
'lossvalue', axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
self.losses = []
|
||||
|
||||
def update(self, labels, preds):
|
||||
loss = preds[0].asnumpy()
|
||||
self.sum_metric += np.mean(loss)
|
||||
self.num_inst += 1.0
|
||||
|
||||
class NMEMetric(mx.metric.EvalMetric):
|
||||
def __init__(self):
|
||||
self.axis = 1
|
||||
super(NMEMetric, self).__init__(
|
||||
'NME', axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
#self.losses = []
|
||||
self.count = 0
|
||||
|
||||
def cal_nme(self, label, pred_label):
|
||||
nme = []
|
||||
for b in xrange(pred_label.shape[0]):
|
||||
record = [None]*6
|
||||
item = []
|
||||
if label.ndim==4:
|
||||
_heatmap = label[b][36]
|
||||
if np.count_nonzero(_heatmap)==0:
|
||||
continue
|
||||
else:#ndim==3
|
||||
#print(label[b])
|
||||
if np.count_nonzero(label[b])==0:
|
||||
continue
|
||||
for p in xrange(pred_label.shape[1]):
|
||||
if label.ndim==4:
|
||||
heatmap_gt = label[b][p]
|
||||
ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None), heatmap_gt.shape)
|
||||
ind_gt = np.array(ind_gt)
|
||||
else:
|
||||
ind_gt = label[b][p]
|
||||
#ind_gt = ind_gt.astype(np.int)
|
||||
#print(ind_gt)
|
||||
heatmap_pred = pred_label[b][p]
|
||||
heatmap_pred = cv2.resize(heatmap_pred, (config.input_img_size, config.input_img_size))
|
||||
ind_pred = np.unravel_index(np.argmax(heatmap_pred, axis=None), heatmap_pred.shape)
|
||||
ind_pred = np.array(ind_pred)
|
||||
#print(ind_gt.shape)
|
||||
#print(ind_pred)
|
||||
if p==36:
|
||||
#print('b', b, p, ind_gt, np.count_nonzero(heatmap_gt))
|
||||
record[0] = ind_gt
|
||||
elif p==39:
|
||||
record[1] = ind_gt
|
||||
elif p==42:
|
||||
record[2] = ind_gt
|
||||
elif p==45:
|
||||
record[3] = ind_gt
|
||||
if record[4] is None or record[5] is None:
|
||||
record[4] = ind_gt
|
||||
record[5] = ind_gt
|
||||
else:
|
||||
record[4] = np.minimum(record[4], ind_gt)
|
||||
record[5] = np.maximum(record[5], ind_gt)
|
||||
#print(ind_gt.shape, ind_pred.shape)
|
||||
value = np.sqrt(np.sum(np.square(ind_gt - ind_pred)))
|
||||
item.append(value)
|
||||
_nme = np.mean(item)
|
||||
if config.landmark_type=='2d':
|
||||
left_eye = (record[0]+record[1])/2
|
||||
right_eye = (record[2]+record[3])/2
|
||||
_dist = np.sqrt(np.sum(np.square(left_eye - right_eye)))
|
||||
#print('eye dist', _dist, left_eye, right_eye)
|
||||
_nme /= _dist
|
||||
else:
|
||||
#_dist = np.sqrt(float(label.shape[2]*label.shape[3]))
|
||||
_dist = np.sqrt(np.sum(np.square(record[5] - record[4])))
|
||||
#print(_dist)
|
||||
_nme /= _dist
|
||||
nme.append(_nme)
|
||||
return np.mean(nme)
|
||||
|
||||
def update(self, labels, preds):
|
||||
self.count+=1
|
||||
label = labels[0].asnumpy()
|
||||
pred_label = preds[-1].asnumpy()
|
||||
nme = self.cal_nme(label, pred_label)
|
||||
|
||||
#print('nme', nme)
|
||||
#nme = np.mean(nme)
|
||||
self.sum_metric += np.mean(nme)
|
||||
self.num_inst += 1.0
|
||||
435
PRNet.mxnet/symbol/sym_heatmap.py
Normal file
435
PRNet.mxnet/symbol/sym_heatmap.py
Normal file
@@ -0,0 +1,435 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
from config import config
|
||||
|
||||
|
||||
ACT_BIT = 1
|
||||
bn_mom = 0.9
|
||||
workspace = 256
|
||||
memonger = False
|
||||
|
||||
|
||||
|
||||
def Conv(**kwargs):
|
||||
body = mx.sym.Convolution(**kwargs)
|
||||
return body
|
||||
|
||||
def Act(data, act_type, name):
|
||||
if act_type=='prelu':
|
||||
body = mx.sym.LeakyReLU(data = data, act_type='prelu', name = name)
|
||||
else:
|
||||
body = mx.symbol.Activation(data=data, act_type=act_type, name=name)
|
||||
return body
|
||||
|
||||
#def lin(data, num_filter, workspace, name, binarize, dcn):
|
||||
# bit = 1
|
||||
# if not binarize:
|
||||
# if not dcn:
|
||||
# conv1 = Conv(data=data, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
|
||||
# no_bias=True, workspace=workspace, name=name + '_conv')
|
||||
# bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
||||
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
||||
# return act1
|
||||
# else:
|
||||
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
||||
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
||||
# conv1_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = act1,
|
||||
# num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
|
||||
# conv1 = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=act1, offset=conv1_offset,
|
||||
# num_filter=num_filter, pad=(1,1), kernel=(3, 3), num_deformable_group=1, stride=(1, 1), dilate=(1, 1), no_bias=False)
|
||||
# #conv1 = Conv(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
|
||||
# # no_bias=False, workspace=workspace, name=name + '_conv')
|
||||
# return conv1
|
||||
# else:
|
||||
# bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
||||
# act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
||||
# conv1 = mx.sym.QConvolution_v1(data=act1, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
|
||||
# no_bias=True, workspace=workspace, name=name + '_conv', act_bit=ACT_BIT, weight_bit=bit)
|
||||
# conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
|
||||
# return conv1
|
||||
|
||||
def lin3(data, num_filter, workspace, name, k, g=1, d=1):
|
||||
if k!=3:
|
||||
conv1 = Conv(data=data, num_filter=num_filter, kernel=(k,k), stride=(1,1), pad=((k-1)//2,(k-1)//2), num_group=g,
|
||||
no_bias=True, workspace=workspace, name=name + '_conv')
|
||||
else:
|
||||
conv1 = Conv(data=data, num_filter=num_filter, kernel=(k,k), stride=(1,1), pad=(d,d), num_group=g, dilate=(d, d),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv')
|
||||
bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
|
||||
act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
|
||||
ret = act1
|
||||
return ret
|
||||
|
||||
def ConvFactory(data, num_filter, kernel, stride=(1, 1), pad=(0, 0), act_type="relu", mirror_attr={}, with_act=True, dcn=False, name=''):
|
||||
if not dcn:
|
||||
conv = mx.symbol.Convolution(
|
||||
data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, workspace=workspace, name=name+'_conv')
|
||||
else:
|
||||
conv_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = data,
|
||||
num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
|
||||
conv = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=data, offset=conv_offset,
|
||||
num_filter=num_filter, pad=(1,1), kernel=(3,3), num_deformable_group=1, stride=stride, dilate=(1, 1), no_bias=False)
|
||||
bn = mx.symbol.BatchNorm(data=conv, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name+'_bn')
|
||||
if with_act:
|
||||
act = Act(bn, act_type, name=name+'_relu')
|
||||
#act = mx.symbol.Activation(
|
||||
# data=bn, act_type=act_type, attr=mirror_attr, name=name+'_relu')
|
||||
return act
|
||||
else:
|
||||
return bn
|
||||
|
||||
class CAB:
|
||||
def __init__(self, data, nFilters, nModules, n, workspace, name, dilate, group):
|
||||
self.data = data
|
||||
self.nFilters = nFilters
|
||||
self.nModules = nModules
|
||||
self.n = n
|
||||
self.workspace = workspace
|
||||
self.name = name
|
||||
self.dilate = dilate
|
||||
self.group = group
|
||||
self.sym_map = {}
|
||||
|
||||
def get_output(self, w, h):
|
||||
key = (w, h)
|
||||
if key in self.sym_map:
|
||||
return self.sym_map[key]
|
||||
ret = None
|
||||
if h==self.n:
|
||||
if w==self.n:
|
||||
ret = (self.data, self.nFilters)
|
||||
else:
|
||||
x = self.get_output(w+1, h)
|
||||
f = int(x[1]*0.5)
|
||||
if w!=self.n-1:
|
||||
body = lin3(x[0], f, self.workspace, "%s_w%d_h%d_1"%(self.name, w, h), 3, self.group, 1)
|
||||
else:
|
||||
body = lin3(x[0], f, self.workspace, "%s_w%d_h%d_1"%(self.name, w, h), 3, self.group, self.dilate)
|
||||
ret = (body,f)
|
||||
else:
|
||||
x = self.get_output(w+1, h+1)
|
||||
y = self.get_output(w, h+1)
|
||||
if h%2==1 and h!=w:
|
||||
xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
|
||||
#xbody = xbody+x[0]
|
||||
else:
|
||||
xbody = x[0]
|
||||
#xbody = x[0]
|
||||
#xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
|
||||
if w==0:
|
||||
ybody = lin3(y[0], y[1], self.workspace, "%s_w%d_h%d_3"%(self.name, w, h), 3, self.group)
|
||||
else:
|
||||
ybody = y[0]
|
||||
ybody = mx.sym.concat(y[0], ybody, dim=1)
|
||||
body = mx.sym.add_n(xbody,ybody, name="%s_w%d_h%d_add"%(self.name, w, h))
|
||||
body = body/2
|
||||
ret = (body, x[1])
|
||||
self.sym_map[key] = ret
|
||||
return ret
|
||||
|
||||
def get(self):
|
||||
return self.get_output(1, 1)[0]
|
||||
|
||||
def conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
|
||||
bit = 1
|
||||
#print('in unit2')
|
||||
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
|
||||
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
|
||||
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
|
||||
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv1')
|
||||
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
|
||||
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
|
||||
conv2 = Conv(data=act2, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(1,1),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv2')
|
||||
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
|
||||
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
|
||||
conv3 = Conv(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
|
||||
workspace=workspace, name=name + '_conv3')
|
||||
#if binarize:
|
||||
# conv3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn4')
|
||||
if dim_match:
|
||||
shortcut = data
|
||||
else:
|
||||
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
|
||||
workspace=workspace, name=name+'_sc')
|
||||
if memonger:
|
||||
shortcut._set_attr(mirror_stage='True')
|
||||
return conv3 + shortcut
|
||||
|
||||
|
||||
def conv_prnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
|
||||
#print('in unit2')
|
||||
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
|
||||
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
|
||||
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
|
||||
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv1')
|
||||
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
|
||||
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
|
||||
conv2 = Conv(data=act2, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(1,1),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv2')
|
||||
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
|
||||
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
|
||||
conv3 = Conv(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
|
||||
workspace=workspace, name=name + '_conv3')
|
||||
if dim_match:
|
||||
shortcut = data
|
||||
else:
|
||||
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
|
||||
workspace=workspace, name=name+'_sc')
|
||||
if memonger:
|
||||
shortcut._set_attr(mirror_stage='True')
|
||||
return conv3 + shortcut
|
||||
|
||||
def conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilation, **kwargs):
|
||||
bit = 1
|
||||
#print('in unit2')
|
||||
# the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
|
||||
bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
|
||||
act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
|
||||
conv1 = Conv(data=act1, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv1')
|
||||
bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
|
||||
act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
|
||||
conv2 = Conv(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv2')
|
||||
bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
|
||||
act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
|
||||
conv3 = Conv(data=act3, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(dilation,dilation), dilate=(dilation,dilation),
|
||||
no_bias=True, workspace=workspace, name=name + '_conv3')
|
||||
conv4 = mx.symbol.Concat(*[conv1, conv2, conv3])
|
||||
if dim_match:
|
||||
shortcut = data
|
||||
else:
|
||||
shortcut = Conv(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
|
||||
workspace=workspace, name=name+'_sc')
|
||||
if memonger:
|
||||
shortcut._set_attr(mirror_stage='True')
|
||||
return conv4 + shortcut
|
||||
|
||||
|
||||
def block17(net, input_num_channels, scale=1.0, with_act=True, act_type='relu', mirror_attr={}, name=''):
|
||||
tower_conv = ConvFactory(net, 192, (1, 1), name=name+'_conv')
|
||||
tower_conv1_0 = ConvFactory(net, 129, (1, 1), name=name+'_conv1_0')
|
||||
tower_conv1_1 = ConvFactory(tower_conv1_0, 160, (1, 7), pad=(1, 2), name=name+'_conv1_1')
|
||||
tower_conv1_2 = ConvFactory(tower_conv1_1, 192, (7, 1), pad=(2, 1), name=name+'_conv1_2')
|
||||
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
|
||||
tower_out = ConvFactory(
|
||||
tower_mixed, input_num_channels, (1, 1), with_act=False, name=name+'_conv_out')
|
||||
net = net+scale * tower_out
|
||||
if with_act:
|
||||
act = mx.symbol.Activation(
|
||||
data=net, act_type=act_type, attr=mirror_attr)
|
||||
return act
|
||||
else:
|
||||
return net
|
||||
|
||||
def block35(net, input_num_channels, scale=1.0, with_act=True, act_type='relu', mirror_attr={}, name=''):
|
||||
M = 1.0
|
||||
tower_conv = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv')
|
||||
tower_conv1_0 = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv1_0')
|
||||
tower_conv1_1 = ConvFactory(tower_conv1_0, int(input_num_channels*0.25*M), (3, 3), pad=(1, 1), name=name+'_conv1_1')
|
||||
tower_conv2_0 = ConvFactory(net, int(input_num_channels*0.25*M), (1, 1), name=name+'_conv2_0')
|
||||
tower_conv2_1 = ConvFactory(tower_conv2_0, int(input_num_channels*0.375*M), (3, 3), pad=(1, 1), name=name+'_conv2_1')
|
||||
tower_conv2_2 = ConvFactory(tower_conv2_1, int(input_num_channels*0.5*M), (3, 3), pad=(1, 1), name=name+'_conv2_2')
|
||||
tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_1, tower_conv2_2])
|
||||
tower_out = ConvFactory(
|
||||
tower_mixed, input_num_channels, (1, 1), with_act=False, name=name+'_conv_out')
|
||||
|
||||
net = net+scale * tower_out
|
||||
if with_act:
|
||||
act = mx.symbol.Activation(
|
||||
data=net, act_type=act_type, attr=mirror_attr)
|
||||
return act
|
||||
else:
|
||||
return net
|
||||
|
||||
def conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
|
||||
assert not binarize
|
||||
if stride[0]>1 or not dim_match:
|
||||
return conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs)
|
||||
conv4 = block35(data, num_filter, name=name+'_block35')
|
||||
return conv4
|
||||
|
||||
def conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs):
|
||||
if stride[0]>1 or not dim_match:
|
||||
return conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilate, **kwargs)
|
||||
cab = CAB(data, num_filter, 1, 4, workspace, name, dilate, 1)
|
||||
return cab.get()
|
||||
|
||||
def conv_block(data, num_filter, stride, dim_match, name, binarize, dcn, dilate):
|
||||
if config.net_block=='resnet':
|
||||
return conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
|
||||
elif config.net_block=='inception':
|
||||
return conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
|
||||
elif config.net_block=='hpm':
|
||||
return conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
|
||||
elif config.net_block=='cab':
|
||||
return conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
|
||||
elif config.net_block=='prnet':
|
||||
return conv_prnet(data, num_filter, stride, dim_match, name, binarize, dcn, dilate)
|
||||
|
||||
def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
|
||||
s = 2
|
||||
_dcn = False
|
||||
up1 = data
|
||||
for i in xrange(nModules):
|
||||
up1 = conv_block(up1, nFilters, (1,1), True, "%s_up1_%d"%(name,i), binarize, _dcn, 1)
|
||||
low1 = mx.sym.Pooling(data=data, kernel=(s, s), stride=(s,s), pad=(0,0), pool_type='max')
|
||||
#low1 = ConvFactory(data, nFilters, (4,4), stride=(2,2), pad=(1,1), name=name+'_conv')
|
||||
#low1 = ConvFactory(data, nFilters, (3,3), stride=(2,2), pad=(1,1), name=name+'_conv')
|
||||
#low1 = ConvFactory(up1, nFilters, (3,3), stride=(2,2), pad=(1,1), name=name+'_conv')
|
||||
for i in xrange(nModules):
|
||||
low1 = conv_block(low1, nFilters, (1,1), True, "%s_low1_%d"%(name,i), binarize, _dcn, 1)
|
||||
if n>1:
|
||||
low2 = hourglass(low1, nFilters, nModules, n-1, workspace, "%s_%d"%(name, n-1), binarize, dcn)
|
||||
else:
|
||||
low2 = low1
|
||||
for i in xrange(nModules):
|
||||
low2 = conv_block(low2, nFilters, (1,1), True, "%s_low2_%d"%(name,i), binarize, _dcn, 1) #TODO
|
||||
low3 = low2
|
||||
for i in xrange(nModules):
|
||||
low3 = conv_block(low3, nFilters, (1,1), True, "%s_low3_%d"%(name,i), binarize, _dcn, 1)
|
||||
up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='nearest', workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
|
||||
#up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='bilinear', num_filter=nFilters, workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
|
||||
#up2 = mx.symbol.Deconvolution(data=low3, num_filter=nFilters, kernel=(s*2,s*2),
|
||||
# stride=(s, s), pad=(s//2, s//2),
|
||||
# name='%s_upsampling_%s'%(name,n),
|
||||
# attr={'lr_mult': '0.1'})
|
||||
#return mx.symbol.add_n(up1, up2)
|
||||
return up2
|
||||
|
||||
|
||||
def prnet_loss(pred, gt_label, mask_label):
|
||||
loss = pred - gt_label
|
||||
#loss = mx.symbol.smooth_l1(loss, scalar=3.0)
|
||||
loss = mx.symbol.abs(loss)
|
||||
loss = mx.symbol.broadcast_mul(loss, mask_label)
|
||||
#loss = mx.symbol.mean(loss, axis=0)
|
||||
#loss = loss*loss
|
||||
#loss = mx.symbol.mean(loss)
|
||||
return loss
|
||||
|
||||
def ce_loss(x, y):
|
||||
#loss = mx.sym.SoftmaxOutput(data = x, label = y, normalization='valid', multi_output=True)
|
||||
x_max = mx.sym.max(x, axis=[2,3], keepdims=True)
|
||||
x = mx.sym.broadcast_minus(x, x_max)
|
||||
body = mx.sym.exp(x)
|
||||
sums = mx.sym.sum(body, axis=[2,3], keepdims=True)
|
||||
body = mx.sym.broadcast_div(body, sums)
|
||||
loss = mx.sym.log(body)
|
||||
loss = loss*y*-1.0
|
||||
#loss = mx.symbol.mean(loss, axis=[1,2,3])
|
||||
loss = mx.symbol.mean(loss)
|
||||
return loss
|
||||
|
||||
def get_symbol(num_classes):
|
||||
m = config.multiplier
|
||||
sFilters = max(int(64*m), 16)
|
||||
mFilters = max(int(128*m), 32)
|
||||
nFilters = int(256*m)
|
||||
|
||||
nModules = config.net_modules
|
||||
nStacks = config.net_stacks
|
||||
binarize = config.net_binarize
|
||||
input_size = config.input_img_size
|
||||
label_size = config.output_label_size
|
||||
use_STA = config.net_sta
|
||||
N = config.net_n
|
||||
DCN = config.net_dcn
|
||||
per_batch_size = config.per_batch_size
|
||||
print('binarize', binarize)
|
||||
print('use_STA', use_STA)
|
||||
print('use_N', N)
|
||||
print('use_DCN', DCN)
|
||||
print('per_batch_size', per_batch_size)
|
||||
#assert(label_size==64 or label_size==32)
|
||||
#assert(input_size==128 or input_size==256)
|
||||
D = input_size // label_size
|
||||
print(input_size, label_size, D)
|
||||
data = mx.sym.Variable(name='data')
|
||||
data = data-127.5
|
||||
data = data*0.0078125
|
||||
gt_label = mx.symbol.Variable(name='softmax_label')
|
||||
mask_label = mx.symbol.Variable(name='mask_label')
|
||||
losses = []
|
||||
closses = []
|
||||
#body = Conv(data=data, num_filter=sFilters, kernel=(3, 3), stride=(1,1), pad=(1, 1),
|
||||
# no_bias=True, name="conv0", workspace=workspace)
|
||||
body = Conv(data=data, num_filter=sFilters, kernel=(7,7), stride=(2,2), pad=(3,3),
|
||||
no_bias=True, name="conv0", workspace=workspace)
|
||||
#body = Conv(data=data, num_filter=sFilters, kernel=(4,4), stride=(2,2), pad=(1,1),
|
||||
# no_bias=True, name="conv0", workspace=workspace)
|
||||
body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
|
||||
body = Act(data=body, act_type='relu', name='relu0')
|
||||
|
||||
dcn = False
|
||||
body = conv_block(body, mFilters, (1,1), sFilters==mFilters, 'res0', False, dcn, 1)
|
||||
body = mx.sym.Pooling(data=body, kernel=(2, 2), stride=(2,2), pad=(0,0), pool_type='max')
|
||||
|
||||
#body = Conv(data=body, num_filter=mFilters, kernel=(4,4), stride=(2,2), pad=(1,1),
|
||||
# no_bias=True, name="conv1", workspace=workspace)
|
||||
#body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
|
||||
#body = Act(data=body, act_type='relu', name='relu1')
|
||||
|
||||
#body = conv_block(body, mFilters, (1,1), True, 'res1', False, dcn, 1) #TODO
|
||||
body = conv_block(body, nFilters, (1,1), mFilters==nFilters, 'res2', binarize, dcn, 1) #binarize=True?
|
||||
|
||||
heatmap = None
|
||||
outs = []
|
||||
|
||||
body = hourglass(body, nFilters, nModules, config.net_n, workspace, 'stack0_hg', binarize, dcn)
|
||||
for j in xrange(nModules):
|
||||
body = conv_block(body, nFilters, (1,1), True, 'stack0_unit%d'%(j), binarize, dcn, 1)
|
||||
_dcn = False
|
||||
ll = ConvFactory(body, nFilters, (1,1), dcn = _dcn, name='stack0_ll')
|
||||
_name = 'heatmap'
|
||||
pred = Conv(data=ll, num_filter=num_classes, kernel=(1, 1), stride=(1,1), pad=(0,0),
|
||||
name=_name, workspace=workspace)
|
||||
loss = prnet_loss(pred, gt_label, mask_label)
|
||||
outs.append(mx.sym.MakeLoss(loss))
|
||||
|
||||
|
||||
pred = mx.symbol.BlockGrad(pred)
|
||||
#loss = mx.symbol.add_n(*losses)
|
||||
#loss = mx.symbol.MakeLoss(loss)
|
||||
#syms = [loss]
|
||||
outs.append(pred)
|
||||
sym = mx.symbol.Group( outs )
|
||||
return sym
|
||||
|
||||
def init_weights(sym, data_shape_dict):
|
||||
#print('in hg')
|
||||
arg_name = sym.list_arguments()
|
||||
aux_name = sym.list_auxiliary_states()
|
||||
arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
|
||||
arg_shape_dict = dict(zip(arg_name, arg_shape))
|
||||
aux_shape_dict = dict(zip(aux_name, aux_shape))
|
||||
#print(aux_shape)
|
||||
#print(aux_params)
|
||||
#print(arg_shape_dict)
|
||||
arg_params = {}
|
||||
aux_params = {}
|
||||
for k,v in arg_shape_dict.iteritems():
|
||||
#print(k,v)
|
||||
if k.endswith('offset_weight') or k.endswith('offset_bias'):
|
||||
print('initializing',k)
|
||||
arg_params[k] = mx.nd.zeros(shape = v)
|
||||
elif k.startswith('fc6_'):
|
||||
if k.endswith('_weight'):
|
||||
print('initializing',k)
|
||||
arg_params[k] = mx.random.normal(0, 0.01, shape=v)
|
||||
elif k.endswith('_bias'):
|
||||
print('initializing',k)
|
||||
arg_params[k] = mx.nd.zeros(shape=v)
|
||||
elif k.find('upsampling')>=0:
|
||||
print('initializing upsampling_weight', k)
|
||||
arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
|
||||
init = mx.init.Initializer()
|
||||
init._init_bilinear(k, arg_params[k])
|
||||
return arg_params, aux_params
|
||||
|
||||
215
PRNet.mxnet/train.py
Normal file
215
PRNet.mxnet/train.py
Normal file
@@ -0,0 +1,215 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import argparse
|
||||
from data import FaceSegIter
|
||||
import mxnet as mx
|
||||
import mxnet.optimizer as optimizer
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
import random
|
||||
import cv2
|
||||
from config import config, default, generate_config
|
||||
from optimizer import ONadam
|
||||
from metric import LossValueMetric, NMEMetric
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
|
||||
import sym_heatmap
|
||||
#import sym_fc
|
||||
#from symbol import fc
|
||||
|
||||
|
||||
args = None
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def main(args):
|
||||
_seed = 727
|
||||
random.seed(_seed)
|
||||
np.random.seed(_seed)
|
||||
mx.random.seed(_seed)
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
print('use cpu')
|
||||
else:
|
||||
print('gpu num:', len(ctx))
|
||||
#ctx = [mx.gpu(0)]
|
||||
args.ctx_num = len(ctx)
|
||||
|
||||
args.batch_size = args.per_batch_size*args.ctx_num
|
||||
config.per_batch_size = args.per_batch_size
|
||||
|
||||
|
||||
|
||||
print('Call with', args, config)
|
||||
train_iter = FaceSegIter(path = config.dataset_path,
|
||||
batch_size = args.batch_size,
|
||||
per_batch_size = args.per_batch_size,
|
||||
aug_level = 1,
|
||||
exf = args.exf,
|
||||
args = args,
|
||||
)
|
||||
|
||||
data_shape = train_iter.get_data_shape()
|
||||
#label_shape = train_iter.get_label_shape()
|
||||
sym = sym_heatmap.get_symbol(num_classes=config.num_classes)
|
||||
if len(args.pretrained)==0:
|
||||
#data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape}
|
||||
data_shape_dict = train_iter.get_shape_dict()
|
||||
arg_params, aux_params = sym_heatmap.init_weights(sym, data_shape_dict)
|
||||
else:
|
||||
vec = args.pretrained.split(',')
|
||||
print('loading', vec)
|
||||
_, arg_params, aux_params = mx.model.load_checkpoint(vec[0], int(vec[1]))
|
||||
#sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
|
||||
|
||||
model = mx.mod.Module(
|
||||
context = ctx,
|
||||
symbol = sym,
|
||||
label_names = train_iter.get_label_names(),
|
||||
)
|
||||
#lr = 1.0e-3
|
||||
#lr = 2.5e-4
|
||||
#_rescale_grad = 1.0/args.ctx_num
|
||||
_rescale_grad = 1.0/args.batch_size
|
||||
#lr = args.lr
|
||||
#opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
|
||||
if args.optimizer=='onadam':
|
||||
opt = ONadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
|
||||
elif args.optimizer=='nadam':
|
||||
opt = optimizer.Nadam(learning_rate=args.lr, rescale_grad=_rescale_grad)
|
||||
elif args.optimizer=='rmsprop':
|
||||
opt = optimizer.RMSProp(learning_rate=args.lr, rescale_grad=_rescale_grad)
|
||||
elif args.optimizer=='adam':
|
||||
opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad)
|
||||
else:
|
||||
opt = optimizer.SGD(learning_rate=args.lr, momentum=0.9, wd=args.wd, rescale_grad=_rescale_grad)
|
||||
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2)
|
||||
_cb = mx.callback.Speedometer(args.batch_size, args.frequent)
|
||||
_metric = LossValueMetric()
|
||||
#_metric = NMEMetric()
|
||||
#_metric2 = AccMetric()
|
||||
#eval_metrics = [_metric, _metric2]
|
||||
eval_metrics = [_metric]
|
||||
lr_steps = [int(x) for x in args.lr_step.split(',')]
|
||||
print('lr-steps', lr_steps)
|
||||
global_step = [0]
|
||||
|
||||
def val_test():
|
||||
all_layers = sym.get_internals()
|
||||
vsym = all_layers['heatmap_output']
|
||||
vmodel = mx.mod.Module(symbol=vsym, context=ctx, label_names = None)
|
||||
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
|
||||
vmodel.bind(data_shapes=[('data', (args.batch_size,)+data_shape)])
|
||||
arg_params, aux_params = model.get_params()
|
||||
vmodel.set_params(arg_params, aux_params)
|
||||
for target in config.val_targets:
|
||||
_file = os.path.join(config.dataset_path, '%s.rec'%target)
|
||||
if not os.path.exists(_file):
|
||||
continue
|
||||
val_iter = FaceSegIter(path_imgrec = _file,
|
||||
batch_size = args.batch_size,
|
||||
#batch_size = 4,
|
||||
aug_level = 0,
|
||||
args = args,
|
||||
)
|
||||
_metric = LossValueMetric()
|
||||
val_metric = mx.metric.create(_metric)
|
||||
val_metric.reset()
|
||||
val_iter.reset()
|
||||
diffs = []
|
||||
for i, eval_batch in enumerate(val_iter):
|
||||
#print(eval_batch.data[0].shape, eval_batch.label[0].shape)
|
||||
batch_data = mx.io.DataBatch(eval_batch.data)
|
||||
model.forward(batch_data, is_train=False)
|
||||
_label = eval_batch.label[0].asnumpy()
|
||||
_pred = model.get_outputs()[-1].asnumpy()
|
||||
_diff = np.abs(_pred-_label)
|
||||
_diff = np.mean(_diff)*config.input_img_size
|
||||
#print('pred', _pred.shape, _label.shape)
|
||||
#print('diff', _diff)
|
||||
diffs.append(_diff)
|
||||
model.update_metric(val_metric, eval_batch.label)
|
||||
nme_value = val_metric.get_name_value()[0][1]
|
||||
print('[%d][%s]LOSS: %f'%(global_step[0], target, nme_value))
|
||||
print('avg diff', np.mean(diffs))
|
||||
|
||||
def _batch_callback(param):
|
||||
_cb(param)
|
||||
global_step[0]+=1
|
||||
mbatch = global_step[0]
|
||||
for _lr in lr_steps:
|
||||
if mbatch==_lr:
|
||||
if args.optimizer=='sgd':
|
||||
opt.lr *= 0.1
|
||||
else:
|
||||
opt.lr *= 0.5
|
||||
print('lr change to', opt.lr)
|
||||
break
|
||||
if mbatch%1000==0:
|
||||
print('lr-batch-epoch:',opt.lr,param.nbatch,param.epoch)
|
||||
if mbatch>0 and mbatch%args.verbose==0:
|
||||
val_test()
|
||||
if args.ckpt==1:
|
||||
msave = mbatch//args.verbose
|
||||
print('saving', msave)
|
||||
arg, aux = model.get_params()
|
||||
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux)
|
||||
if mbatch==lr_steps[-1]:
|
||||
if args.ckpt==2:
|
||||
#msave = mbatch//args.verbose
|
||||
msave = 1
|
||||
print('saving', msave)
|
||||
arg, aux = model.get_params()
|
||||
mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg, aux)
|
||||
sys.exit(0)
|
||||
|
||||
train_iter = mx.io.PrefetchingIter(train_iter)
|
||||
|
||||
model.fit(train_iter,
|
||||
begin_epoch = 0,
|
||||
num_epoch = 9999,
|
||||
#eval_data = val_iter,
|
||||
eval_data = None,
|
||||
eval_metric = eval_metrics,
|
||||
kvstore = 'device',
|
||||
optimizer = opt,
|
||||
initializer = initializer,
|
||||
arg_params = arg_params,
|
||||
aux_params = aux_params,
|
||||
allow_missing = True,
|
||||
batch_end_callback = _batch_callback,
|
||||
epoch_end_callback = None,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Train face alignment')
|
||||
# general
|
||||
parser.add_argument('--network', help='network name', default=default.network, type=str)
|
||||
parser.add_argument('--dataset', help='dataset name', default=default.dataset, type=str)
|
||||
args, rest = parser.parse_known_args()
|
||||
generate_config(args.network, args.dataset)
|
||||
parser.add_argument('--prefix', default=default.prefix, help='directory to save model.')
|
||||
parser.add_argument('--pretrained', default=default.pretrained, help='')
|
||||
parser.add_argument('--optimizer', default='nadam', help='')
|
||||
parser.add_argument('--lr', type=float, default=default.lr, help='')
|
||||
parser.add_argument('--wd', type=float, default=default.wd, help='')
|
||||
parser.add_argument('--per-batch-size', type=int, default=default.per_batch_size, help='')
|
||||
parser.add_argument('--lr-step', help='learning rate steps (in epoch)', default=default.lr_step, type=str)
|
||||
parser.add_argument('--ckpt', type=int, default=1, help='')
|
||||
parser.add_argument('--norm', type=int, default=0, help='')
|
||||
parser.add_argument('--exf', type=int, default=1, help='')
|
||||
parser.add_argument('--frequent', type=int, default=default.frequent, help='')
|
||||
parser.add_argument('--verbose', type=int, default=default.verbose, help='')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
||||
36
README.md
36
README.md
@@ -5,8 +5,10 @@ By Jia Guo and [Jiankang Deng](https://jiankangdeng.github.io/)
|
||||
|
||||
## License
|
||||
|
||||
The code of InsightFace is released under the MIT License.
|
||||
The code of InsightFace is released under the MIT License. There is no limitation for both acadmic and commercial usage.
|
||||
|
||||
The training data containing the annotation (and the models trained with these data) are available for non-commercial research purposes only.
|
||||
|
||||
## ArcFace Video Demo
|
||||
|
||||
[](https://www.youtube.com/watch?v=y-D1tReryGA&t=81s)
|
||||
@@ -15,10 +17,16 @@ Please click the image to watch the Youtube video. For Bilibili users, click [he
|
||||
|
||||
## Recent Update
|
||||
|
||||
**`2019.04.14`**: We will launch a Light-weight Face Recognition challenge/workshop on ICCV 2019.
|
||||
**`2019.08.10`**: We achieved 2nd place at [WIDER Face Detection Challenge 2019](http://wider-challenge.org/2019.html).
|
||||
|
||||
**`2019.04.04`**: Arcface achieved state-of-the-art performance (5/109) on the NIST Face Recognition Vendor Test (FRVT) (1:1 verification)
|
||||
[report](https://www.nist.gov/sites/default/files/documents/2019/04/04/frvt_report_2019_04_04.pdf) (name: Imperial-000). Our solution is based on [MS1MV2+DeepGlintAsian, ResNet100, ArcFace loss].
|
||||
**`2019.05.30`**: [Presentation at cvmart](https://pan.baidu.com/s/1v9fFHBJ8Q9Kl9Z6GwhbY6A)
|
||||
|
||||
**`2019.04.30`**: Our Face detector ([RetinaFace](https://github.com/deepinsight/insightface/tree/master/RetinaFace)) obtains state-of-the-art results on [the WiderFace dataset](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html).
|
||||
|
||||
**`2019.04.14`**: We will launch a [Light-weight Face Recognition challenge/workshop](https://github.com/deepinsight/insightface/tree/master/iccv19-challenge) on ICCV 2019.
|
||||
|
||||
**`2019.04.04`**: Arcface achieved state-of-the-art performance (7/109) on the NIST Face Recognition Vendor Test (FRVT) (1:1 verification)
|
||||
[report](https://www.nist.gov/sites/default/files/documents/2019/04/04/frvt_report_2019_04_04.pdf) (name: Imperial-000 and Imperial-001). Our solution is based on [MS1MV2+DeepGlintAsian, ResNet100, ArcFace loss].
|
||||
|
||||
**`2019.02.08`**: Please check [https://github.com/deepinsight/insightface/tree/master/recognition](https://github.com/deepinsight/insightface/tree/master/recognition) for our parallel training code which can easily and efficiently support one million identities on a single machine (8* 1080ti).
|
||||
|
||||
@@ -203,6 +211,7 @@ For single cropped face image(112x112), total inference time is only 17ms on our
|
||||
|
||||
- TensorFlow: [InsightFace_TF](https://github.com/auroua/InsightFace_TF)
|
||||
- TensorFlow: [tf-insightface](https://github.com/AIInAi/tf-insightface)
|
||||
- TensorFlow:[insightface](https://github.com/Fei-Wang/insightface)
|
||||
- PyTorch: [InsightFace_Pytorch](https://github.com/TreB1eN/InsightFace_Pytorch)
|
||||
- PyTorch: [arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch)
|
||||
- Caffe: [arcface-caffe](https://github.com/xialuxi/arcface-caffe)
|
||||
@@ -212,23 +221,38 @@ For single cropped face image(112x112), total inference time is only 17ms on our
|
||||
|
||||
## Face Alignment
|
||||
|
||||
Todo
|
||||
Please check the [Menpo](https://github.com/jiankangdeng/MenpoBenchmark) Benchmark and [Dense U-Net](https://github.com/deepinsight/insightface/tree/master/alignment) for more details.
|
||||
|
||||
## Face Detection
|
||||
|
||||
Todo
|
||||
Please check [RetinaFace](https://github.com/deepinsight/insightface/tree/master/RetinaFace) for more details.
|
||||
|
||||
## Citation
|
||||
|
||||
If you find *InsightFace* useful in your research, please consider to cite the following related papers:
|
||||
|
||||
```
|
||||
@inproceedings{deng2019retinaface,
|
||||
title={RetinaFace: Single-stage Dense Face Localisation in the Wild},
|
||||
author={Deng, Jiankang and Guo, Jia and Yuxiang, Zhou and Jinke Yu and Irene Kotsia and Zafeiriou, Stefanos},
|
||||
booktitle={arxiv},
|
||||
year={2019}
|
||||
}
|
||||
|
||||
@inproceedings{guo2018stacked,
|
||||
title={Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment},
|
||||
author={Guo, Jia and Deng, Jiankang and Xue, Niannan and Zafeiriou, Stefanos},
|
||||
booktitle={BMVC},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
@article{deng2018menpo,
|
||||
title={The Menpo benchmark for multi-pose 2D and 3D facial landmark localisation and tracking},
|
||||
author={Deng, Jiankang and Roussos, Anastasios and Chrysos, Grigorios and Ververas, Evangelos and Kotsia, Irene and Shen, Jie and Zafeiriou, Stefanos},
|
||||
journal={IJCV},
|
||||
year={2018}
|
||||
}
|
||||
|
||||
@inproceedings{deng2018arcface,
|
||||
title={ArcFace: Additive Angular Margin Loss for Deep Face Recognition},
|
||||
author={Deng, Jiankang and Guo, Jia and Niannan, Xue and Zafeiriou, Stefanos},
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
## Introduction
|
||||
|
||||
RetinaFace is a practical single-stage face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641)
|
||||
RetinaFace is a practical single-stage [SOTA](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) face detector which is initially described in [arXiv technical report](https://arxiv.org/abs/1905.00641)
|
||||
|
||||

|
||||
|
||||
@@ -40,7 +40,7 @@ RetinaFace is a practical single-stage face detector which is initially describe
|
||||
Please check ``train.py`` for training.
|
||||
|
||||
1. Copy ``rcnn/sample_config.py`` to ``rcnn/config.py``
|
||||
2. Download pretrained models and put them into ``model/``.
|
||||
2. Download ImageNet pretrained models and put them into ``model/``(these models are not for detection testing/inferencing but training and parameters initialization).
|
||||
|
||||
ImageNet ResNet50 ([baidu cloud](https://pan.baidu.com/s/1WAkU9ZA_j-OmzO-sdk9whA) and [dropbox](https://www.dropbox.com/s/48b850vmnaaasfl/imagenet-resnet-50.zip?dl=0)).
|
||||
|
||||
@@ -54,7 +54,7 @@ Before training, you can check the ``resnet`` network configuration (e.g. pretra
|
||||
|
||||
Please check ``test.py`` for testing.
|
||||
|
||||
## Models
|
||||
## RetinaFace Pretrained Models
|
||||
|
||||
Pretrained Model: RetinaFace-R50 ([baidu cloud](https://pan.baidu.com/s/1C6nKq122gJxRhb37vK0_LQ) or [dropbox](https://www.dropbox.com/s/53ftnlarhyrpkg2/retinaface-R50.zip?dl=0)) is a medium size model with ResNet50 backbone.
|
||||
It can output face bounding boxes and five facial landmarks in a single forward pass.
|
||||
@@ -63,6 +63,13 @@ WiderFace validation mAP: Easy 96.5, Medium 95.6, Hard 90.4.
|
||||
|
||||
To avoid the confliction with the WiderFace Challenge (ICCV 2019), we postpone the release time of our best model.
|
||||
|
||||
## Third-party Models
|
||||
|
||||
[yangfly](https://github.com/yangfly): RetinaFace-MobileNet0.25 ([baidu cloud](https://pan.baidu.com/s/1P1ypO7VYUbNAezdvLm2m9w)).
|
||||
WiderFace validation mAP: Hard 82.5. (model size: 1.68Mb)
|
||||
|
||||
[clancylian](https://github.com/clancylian/retinaface): C++ version
|
||||
|
||||
## References
|
||||
|
||||
```
|
||||
|
||||
@@ -82,7 +82,7 @@ def get_image(roidb, scale=False):
|
||||
im = im.astype(np.float32)
|
||||
boxes_mask = roi_rec['boxes_mask'].copy() * im_scale
|
||||
boxes_mask = boxes_mask.astype(np.int)
|
||||
for j in xrange(boxes_mask.shape[0]):
|
||||
for j in range(boxes_mask.shape[0]):
|
||||
m = boxes_mask[j]
|
||||
im_tensor[:,:,m[1]:m[3],m[0]:m[2]] = 0.0
|
||||
#print('find mask', m, file=sys.stderr)
|
||||
@@ -131,7 +131,7 @@ def __get_crop_image(roidb):
|
||||
#im = im.astype(np.float32)
|
||||
boxes_mask = roi_rec['boxes_mask'].copy()
|
||||
boxes_mask = boxes_mask.astype(np.int)
|
||||
for j in xrange(boxes_mask.shape[0]):
|
||||
for j in range(boxes_mask.shape[0]):
|
||||
m = boxes_mask[j]
|
||||
im[m[1]:m[3],m[0]:m[2],:] = 0
|
||||
#print('find mask', m, file=sys.stderr)
|
||||
@@ -143,7 +143,7 @@ def __get_crop_image(roidb):
|
||||
TARGET_BOX_SCALES = np.array([16,32,64,128,256,512])
|
||||
assert roi_rec['boxes'].shape[0]>0
|
||||
candidates = []
|
||||
for i in xrange(roi_rec['boxes'].shape[0]):
|
||||
for i in range(roi_rec['boxes'].shape[0]):
|
||||
box = roi_rec['boxes'][i]
|
||||
box_size = max(box[2]-box[0], box[3]-box[1])
|
||||
if box_size<config.TRAIN.MIN_BOX_SIZE:
|
||||
@@ -181,7 +181,7 @@ def __get_crop_image(roidb):
|
||||
im = cv2.warpAffine(im, M, (SIZE, SIZE), borderValue = tuple(config.PIXEL_MEANS))
|
||||
#tbox = np.array([left, left+SIZE, up, up+SIZE], dtype=np.int)
|
||||
#im_new = np.zeros( (SIZE, SIZE,3), dtype=im.dtype)
|
||||
#for i in xrange(3):
|
||||
#for i in range(3):
|
||||
# im_new[:,:,i] = config.PIXEL_MEANS[i]
|
||||
new_rec['boxes'][:,0] -= left
|
||||
new_rec['boxes'][:,2] -= left
|
||||
@@ -192,7 +192,7 @@ def __get_crop_image(roidb):
|
||||
#print('before', new_rec['boxes'].shape[0])
|
||||
boxes_new = []
|
||||
classes_new = []
|
||||
for i in xrange(new_rec['boxes'].shape[0]):
|
||||
for i in range(new_rec['boxes'].shape[0]):
|
||||
box = new_rec['boxes'][i]
|
||||
box_size = max(box[2]-box[0], box[3]-box[1])
|
||||
center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
|
||||
@@ -211,7 +211,7 @@ def __get_crop_image(roidb):
|
||||
global TMP_ID
|
||||
if TMP_ID<10:
|
||||
tim = im.copy()
|
||||
for i in xrange(new_rec['boxes'].shape[0]):
|
||||
for i in range(new_rec['boxes'].shape[0]):
|
||||
box = new_rec['boxes'][i].copy().astype(np.int)
|
||||
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
|
||||
filename = './trainimages/train%d.png' % TMP_ID
|
||||
@@ -279,7 +279,7 @@ def get_crop_image1(roidb):
|
||||
#im = im.astype(np.float32)
|
||||
boxes_mask = roi_rec['boxes_mask'].copy()
|
||||
boxes_mask = boxes_mask.astype(np.int)
|
||||
for j in xrange(boxes_mask.shape[0]):
|
||||
for j in range(boxes_mask.shape[0]):
|
||||
m = boxes_mask[j]
|
||||
im[m[1]:m[3],m[0]:m[2],:] = 127
|
||||
#print('find mask', m, file=sys.stderr)
|
||||
@@ -342,7 +342,7 @@ def get_crop_image1(roidb):
|
||||
#print(origin_shape, im_new.shape, im_scale)
|
||||
valid = []
|
||||
valid_boxes = []
|
||||
for i in xrange(boxes_new.shape[0]):
|
||||
for i in range(boxes_new.shape[0]):
|
||||
box = boxes_new[i]
|
||||
#center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
|
||||
centerx = (box[0]+box[2])/2
|
||||
@@ -385,12 +385,12 @@ def get_crop_image1(roidb):
|
||||
global TMP_ID
|
||||
if TMP_ID>=0 and TMP_ID<10:
|
||||
tim = im.copy().astype(np.uint8)
|
||||
for i in xrange(new_rec['boxes'].shape[0]):
|
||||
for i in range(new_rec['boxes'].shape[0]):
|
||||
box = new_rec['boxes'][i].copy().astype(np.int)
|
||||
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
|
||||
print('draw box:', box)
|
||||
if config.FACE_LANDMARK:
|
||||
for i in xrange(new_rec['landmarks'].shape[0]):
|
||||
for i in range(new_rec['landmarks'].shape[0]):
|
||||
landmark = new_rec['landmarks'][i].copy()
|
||||
if landmark[0][2]<0:
|
||||
print('zero', landmark)
|
||||
@@ -444,14 +444,14 @@ def get_crop_image2(roidb):
|
||||
#im = im.astype(np.float32)
|
||||
boxes_mask = roi_rec['boxes_mask'].copy()
|
||||
boxes_mask = boxes_mask.astype(np.int)
|
||||
for j in xrange(boxes_mask.shape[0]):
|
||||
for j in range(boxes_mask.shape[0]):
|
||||
m = boxes_mask[j]
|
||||
im[m[1]:m[3],m[0]:m[2],:] = 0
|
||||
#print('find mask', m, file=sys.stderr)
|
||||
SIZE = config.SCALES[0][0]
|
||||
scale_array = np.array([16,32,64,128,256,512], dtype=np.float32)
|
||||
candidates = []
|
||||
for i in xrange(roi_rec['boxes'].shape[0]):
|
||||
for i in range(roi_rec['boxes'].shape[0]):
|
||||
box = roi_rec['boxes'][i]
|
||||
box_size = max(box[2]-box[0], box[3]-box[1])
|
||||
if box_size<config.TRAIN.MIN_BOX_SIZE:
|
||||
@@ -594,7 +594,7 @@ def get_crop_image2(roidb):
|
||||
#print(origin_shape, im_new.shape, im_scale)
|
||||
valid = []
|
||||
valid_boxes = []
|
||||
for i in xrange(boxes_new.shape[0]):
|
||||
for i in range(boxes_new.shape[0]):
|
||||
box = boxes_new[i]
|
||||
#center = np.array(([box[0], box[1]]+[box[2], box[3]]))/2
|
||||
centerx = (box[0]+box[2])/2
|
||||
@@ -633,12 +633,12 @@ def get_crop_image2(roidb):
|
||||
global TMP_ID
|
||||
if TMP_ID>=0 and TMP_ID<10:
|
||||
tim = im.copy().astype(np.uint8)
|
||||
for i in xrange(new_rec['boxes'].shape[0]):
|
||||
for i in range(new_rec['boxes'].shape[0]):
|
||||
box = new_rec['boxes'][i].copy().astype(np.int)
|
||||
cv2.rectangle(tim, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 1)
|
||||
print('draw box:', box)
|
||||
if config.FACE_LANDMARK:
|
||||
for i in xrange(new_rec['landmarks'].shape[0]):
|
||||
for i in range(new_rec['landmarks'].shape[0]):
|
||||
landmark = new_rec['landmarks'][i].copy()
|
||||
if landmark[10]==0.0:
|
||||
print('zero', landmark)
|
||||
|
||||
@@ -6,7 +6,7 @@ import mxnet.autograd as ag
|
||||
import numpy as np
|
||||
from rcnn.config import config
|
||||
from rcnn.PY_OP import rpn_fpn_ohem3
|
||||
from symbol_common import get_sym_train
|
||||
from rcnn.symbol.symbol_common import get_sym_train
|
||||
|
||||
|
||||
def conv_only(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \
|
||||
|
||||
@@ -6,7 +6,7 @@ import mxnet.autograd as ag
|
||||
import numpy as np
|
||||
from rcnn.config import config
|
||||
from rcnn.PY_OP import rpn_fpn_ohem3
|
||||
from symbol_common import get_sym_train
|
||||
from rcnn.symbol.symbol_common import get_sym_train
|
||||
|
||||
def conv_only(from_layer, name, num_filter, kernel=(1,1), pad=(0,0), \
|
||||
stride=(1,1), bias_wd_mult=0.0):
|
||||
|
||||
@@ -159,7 +159,7 @@ class FaceSegIter(DataIter):
|
||||
|
||||
#ul = np.array( (50000,50000), dtype=np.int32)
|
||||
#br = np.array( (0,0), dtype=np.int32)
|
||||
#for i in xrange(hlabel.shape[0]):
|
||||
#for i in range(hlabel.shape[0]):
|
||||
# h = int(hlabel[i][0])
|
||||
# w = int(hlabel[i][1])
|
||||
# key = np.array((h,w))
|
||||
@@ -171,9 +171,9 @@ class FaceSegIter(DataIter):
|
||||
def get_flip(self, data, label):
|
||||
data_flip = np.zeros_like(data)
|
||||
label_flip = np.zeros_like(label)
|
||||
for k in xrange(data_flip.shape[2]):
|
||||
for k in range(data_flip.shape[2]):
|
||||
data_flip[:,:,k] = np.fliplr(data[:,:,k])
|
||||
for k in xrange(label_flip.shape[0]):
|
||||
for k in range(label_flip.shape[0]):
|
||||
label_flip[k,:] = np.fliplr(label[k,:])
|
||||
#print(label[0,:].shape)
|
||||
label_flip = label_flip[self.flip_order,:]
|
||||
@@ -186,7 +186,7 @@ class FaceSegIter(DataIter):
|
||||
# filename = './vis/raw_%d.jpg' % (self.img_num)
|
||||
# print('save', filename)
|
||||
# draw = data.copy()
|
||||
# for i in xrange(label.shape[0]):
|
||||
# for i in range(label.shape[0]):
|
||||
# cv2.circle(draw, (label[i][1], label[i][0]), 1, (0, 0, 255), 2)
|
||||
# scipy.misc.imsave(filename, draw)
|
||||
|
||||
@@ -223,7 +223,7 @@ class FaceSegIter(DataIter):
|
||||
#data_out = img_helper.crop2(data, center, _scale, (self.input_img_size, self.input_img_size), rot=rotate)
|
||||
label_out = np.zeros(self.label_shape, dtype=np.float32)
|
||||
#print('out shapes', data_out.shape, label_out.shape)
|
||||
for i in xrange(label.shape[0]):
|
||||
for i in range(label.shape[0]):
|
||||
pt = label[i].copy()
|
||||
#pt = pt[::-1]
|
||||
npt = img_helper.transform_pt(pt, trans)
|
||||
@@ -277,7 +277,7 @@ class FaceSegIter(DataIter):
|
||||
print('save', filename)
|
||||
draw = data_out.copy()
|
||||
alabel = label_out.copy()
|
||||
for i in xrange(label.shape[0]):
|
||||
for i in range(label.shape[0]):
|
||||
a = cv2.resize(alabel[i], (self.input_img_size, self.input_img_size))
|
||||
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
|
||||
cv2.circle(draw, (ind[1], ind[0]), 1, (0, 0, 255), 2)
|
||||
|
||||
@@ -28,7 +28,7 @@ class NMEMetric(mx.metric.EvalMetric):
|
||||
|
||||
def cal_nme(self, label, pred_label):
|
||||
nme = []
|
||||
for b in xrange(pred_label.shape[0]):
|
||||
for b in range(pred_label.shape[0]):
|
||||
record = [None]*6
|
||||
item = []
|
||||
if label.ndim==4:
|
||||
@@ -39,7 +39,7 @@ class NMEMetric(mx.metric.EvalMetric):
|
||||
#print(label[b])
|
||||
if np.count_nonzero(label[b])==0:
|
||||
continue
|
||||
for p in xrange(pred_label.shape[1]):
|
||||
for p in range(pred_label.shape[1]):
|
||||
if label.ndim==4:
|
||||
heatmap_gt = label[b][p]
|
||||
ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None), heatmap_gt.shape)
|
||||
|
||||
@@ -313,19 +313,19 @@ def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
|
||||
s = 2
|
||||
_dcn = False
|
||||
up1 = data
|
||||
for i in xrange(nModules):
|
||||
for i in range(nModules):
|
||||
up1 = conv_block(up1, nFilters, (1,1), True, "%s_up1_%d"%(name,i), binarize, _dcn, 1)
|
||||
low1 = mx.sym.Pooling(data=data, kernel=(s, s), stride=(s,s), pad=(0,0), pool_type='max')
|
||||
for i in xrange(nModules):
|
||||
for i in range(nModules):
|
||||
low1 = conv_block(low1, nFilters, (1,1), True, "%s_low1_%d"%(name,i), binarize, _dcn, 1)
|
||||
if n>1:
|
||||
low2 = hourglass(low1, nFilters, nModules, n-1, workspace, "%s_%d"%(name, n-1), binarize, dcn)
|
||||
else:
|
||||
low2 = low1
|
||||
for i in xrange(nModules):
|
||||
for i in range(nModules):
|
||||
low2 = conv_block(low2, nFilters, (1,1), True, "%s_low2_%d"%(name,i), binarize, _dcn, 1) #TODO
|
||||
low3 = low2
|
||||
for i in xrange(nModules):
|
||||
for i in range(nModules):
|
||||
low3 = conv_block(low3, nFilters, (1,1), True, "%s_low3_%d"%(name,i), binarize, _dcn, 1)
|
||||
up2 = mx.symbol.UpSampling(low3, scale=s, sample_type='nearest', workspace=512, name='%s_upsampling_%s'%(name,n), num_args=1)
|
||||
return mx.symbol.add_n(up1, up2)
|
||||
@@ -517,14 +517,14 @@ def get_symbol(num_classes):
|
||||
|
||||
heatmap = None
|
||||
|
||||
for i in xrange(nStacks):
|
||||
for i in range(nStacks):
|
||||
shortcut = body
|
||||
if config.net_sta>0:
|
||||
sta = STA(body, nFilters, nModules, config.net_n+1, workspace, 'sta%d'%(i))
|
||||
body = sta.get()
|
||||
else:
|
||||
body = hourglass(body, nFilters, nModules, config.net_n, workspace, 'stack%d_hg'%(i), binarize, dcn)
|
||||
for j in xrange(nModules):
|
||||
for j in range(nModules):
|
||||
body = conv_block(body, nFilters, (1,1), True, 'stack%d_unit%d'%(i,j), binarize, dcn, 1)
|
||||
_dcn = True if config.net_dcn>=2 else False
|
||||
ll = ConvFactory(body, nFilters, (1,1), dcn = _dcn, name='stack%d_ll'%(i))
|
||||
@@ -596,7 +596,8 @@ def init_weights(sym, data_shape_dict):
|
||||
#print(arg_shape_dict)
|
||||
arg_params = {}
|
||||
aux_params = {}
|
||||
for k,v in arg_shape_dict.iteritems():
|
||||
for k in arg_shape_dict:
|
||||
v = arg_shape_dict[k]
|
||||
#print(k,v)
|
||||
if k.endswith('offset_weight') or k.endswith('offset_bias'):
|
||||
print('initializing',k)
|
||||
|
||||
@@ -54,7 +54,7 @@ class Handler:
|
||||
tb = datetime.datetime.now()
|
||||
print('module time cost', (tb-ta).total_seconds())
|
||||
ret = np.zeros( (alabel.shape[0], 2), dtype=np.float32)
|
||||
for i in xrange(alabel.shape[0]):
|
||||
for i in range(alabel.shape[0]):
|
||||
a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
|
||||
ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
|
||||
#ret[i] = (ind[0], ind[1]) #h, w
|
||||
|
||||
@@ -35,7 +35,7 @@ def main(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
|
||||
@@ -19,7 +19,7 @@ model = face_embedding.FaceModel(args)
|
||||
img = cv2.imread('/raid5data/dplearn/megaface/facescrubr/112x112/Tom_Hanks/Tom_Hanks_54745.png')
|
||||
|
||||
time_now = datetime.datetime.now()
|
||||
for i in xrange(3000):
|
||||
for i in range(3000):
|
||||
f1 = model.get_feature(img)
|
||||
time_now2 = datetime.datetime.now()
|
||||
diff = time_now2 - time_now
|
||||
|
||||
@@ -22,7 +22,7 @@ import face_preprocess
|
||||
|
||||
|
||||
def do_flip(data):
|
||||
for idx in xrange(data.shape[0]):
|
||||
for idx in range(data.shape[0]):
|
||||
data[idx,:,:] = np.fliplr(data[idx,:,:])
|
||||
|
||||
class FaceModel:
|
||||
|
||||
@@ -22,7 +22,7 @@ import face_preprocess
|
||||
|
||||
|
||||
def do_flip(data):
|
||||
for idx in xrange(data.shape[0]):
|
||||
for idx in range(data.shape[0]):
|
||||
data[idx,:,:] = np.fliplr(data[idx,:,:])
|
||||
|
||||
def get_model(ctx, image_size, model_str, layer):
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import sys, os
|
||||
|
||||
source_dir = sys.argv[1]
|
||||
input_dir = sys.argv[2]
|
||||
idx_file = os.path.join(source_dir, 'traino.idx')
|
||||
rec_file = os.path.join(source_dir, 'traino.rec')
|
||||
writer = mx.recordio.MXIndexedRecordIO(os.path.join(source_dir,'train.idx'), os.path.join(source_dir,'train.rec'), 'w') # pylint: disable=redefined-variable-type
|
||||
imgrec = mx.recordio.MXIndexedRecordIO(idx_file, rec_file, 'r') # pylint: disable=redefined-variable-type
|
||||
seq = list(imgrec.keys)
|
||||
widx = 0
|
||||
for img_idx in seq:
|
||||
s = imgrec.read_idx(img_idx)
|
||||
assert widx==img_idx
|
||||
writer.write_idx(widx, s)
|
||||
widx+=1
|
||||
|
||||
|
||||
stat = {}
|
||||
|
||||
for _file in os.listdir(input_dir):
|
||||
if not _file.endswith('.rec'):
|
||||
continue
|
||||
rec_file = os.path.join(input_dir, _file)
|
||||
print(rec_file)
|
||||
idx_file = rec_file[:-4]+'.idx'
|
||||
imgrec = mx.recordio.MXIndexedRecordIO(idx_file, rec_file, 'r') # pylint: disable=redefined-variable-type
|
||||
seq = list(imgrec.keys)
|
||||
for img_idx in seq:
|
||||
if img_idx%100==0:
|
||||
print(img_idx, stat)
|
||||
s = imgrec.read_idx(img_idx)
|
||||
header, img = mx.recordio.unpack(s)
|
||||
try:
|
||||
image = mx.image.imdecode(img).asnumpy()
|
||||
except:
|
||||
continue
|
||||
age = int(header.label[0])
|
||||
if age>=20:
|
||||
continue
|
||||
age_group = age//10
|
||||
#if not age in stat:
|
||||
stat[age_group] = 0
|
||||
stat[age_group]+=1
|
||||
label = [9999, age]
|
||||
nheader = mx.recordio.IRHeader(0, label, widx, 0)
|
||||
bgr = image[:,:,::-1]
|
||||
s = mx.recordio.pack_img(nheader, bgr, quality=95, img_fmt='.jpg')
|
||||
writer.write_idx(widx, s)
|
||||
widx+=1
|
||||
|
||||
@@ -124,7 +124,7 @@ class FaceImageIter(io.DataIter):
|
||||
def mirror_aug(self, img):
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
for c in xrange(img.shape[2]):
|
||||
for c in range(img.shape[2]):
|
||||
img[:,:,c] = np.fliplr(img[:,:,c])
|
||||
return img
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ import face_preprocess
|
||||
|
||||
|
||||
def do_flip(data):
|
||||
for idx in xrange(data.shape[0]):
|
||||
for idx in range(data.shape[0]):
|
||||
data[idx,:,:] = np.fliplr(data[idx,:,:])
|
||||
|
||||
def get_model(ctx, image_size, model_str, layer):
|
||||
|
||||
@@ -81,7 +81,7 @@ class MAEMetric(mx.metric.EvalMetric):
|
||||
pred_age = np.zeros( label_age.shape, dtype=np.int)
|
||||
#pred_age = np.zeros( label_age.shape, dtype=np.float32)
|
||||
pred = preds[-1].asnumpy()
|
||||
for i in xrange(AGE):
|
||||
for i in range(AGE):
|
||||
_pred = pred[:,2+i*2:4+i*2]
|
||||
_pred = np.argmax(_pred, axis=1)
|
||||
#pred = pred[:,1]
|
||||
@@ -107,7 +107,7 @@ class CUMMetric(mx.metric.EvalMetric):
|
||||
label_age = np.count_nonzero(label[:,1:], axis=1)
|
||||
pred_age = np.zeros( label_age.shape, dtype=np.int)
|
||||
pred = preds[-1].asnumpy()
|
||||
for i in xrange(AGE):
|
||||
for i in range(AGE):
|
||||
_pred = pred[:,2+i*2:4+i*2]
|
||||
_pred = np.argmax(_pred, axis=1)
|
||||
#pred = pred[:,1]
|
||||
@@ -184,7 +184,7 @@ def train_net(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
Gluon interface, not totally working.
|
||||
@@ -1,271 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import random
|
||||
import logging
|
||||
import sys
|
||||
import numbers
|
||||
import math
|
||||
import sklearn
|
||||
import datetime
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
import mxnet as mx
|
||||
from mxnet import ndarray as nd
|
||||
from mxnet import io
|
||||
from mxnet import recordio
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
|
||||
import face_preprocess
|
||||
import multiprocessing
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
class FaceImageIter(io.DataIter):
|
||||
|
||||
def __init__(self, batch_size, data_shape,
|
||||
path_imgrec = None, task = 'age',
|
||||
shuffle=False, aug_list=None, mean = None,
|
||||
rand_mirror = False, cutoff = 0,
|
||||
data_name='data', label_name='softmax_label', **kwargs):
|
||||
super(FaceImageIter, self).__init__()
|
||||
assert path_imgrec
|
||||
if path_imgrec:
|
||||
logging.info('loading recordio %s...',
|
||||
path_imgrec)
|
||||
path_imgidx = path_imgrec[0:-4]+".idx"
|
||||
self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
|
||||
s = self.imgrec.read_idx(0)
|
||||
header, _ = recordio.unpack(s)
|
||||
self.imgidx = list(self.imgrec.keys)
|
||||
if shuffle:
|
||||
self.seq = self.imgidx
|
||||
self.oseq = self.imgidx
|
||||
print(len(self.seq))
|
||||
else:
|
||||
self.seq = None
|
||||
|
||||
self.mean = mean
|
||||
self.nd_mean = None
|
||||
if self.mean:
|
||||
self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
|
||||
self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))
|
||||
|
||||
self.check_data_shape(data_shape)
|
||||
self.provide_data = [(data_name, (batch_size,) + data_shape)]
|
||||
self.batch_size = batch_size
|
||||
self.data_shape = data_shape
|
||||
self.shuffle = shuffle
|
||||
self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
|
||||
self.rand_mirror = rand_mirror
|
||||
print('rand_mirror', rand_mirror)
|
||||
self.cutoff = cutoff
|
||||
if task=='age':
|
||||
self.provide_label = [(label_name, (batch_size,100))]
|
||||
else:
|
||||
self.provide_label = [(label_name, (batch_size,))]
|
||||
#print(self.provide_label[0][1])
|
||||
self.cur = 0
|
||||
self.nbatch = 0
|
||||
self.is_init = False
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""Resets the iterator to the beginning of the data."""
|
||||
print('call reset()')
|
||||
self.cur = 0
|
||||
if self.shuffle:
|
||||
random.shuffle(self.seq)
|
||||
if self.seq is None and self.imgrec is not None:
|
||||
self.imgrec.reset()
|
||||
|
||||
def num_samples(self):
|
||||
return len(self.seq)
|
||||
|
||||
def next_sample(self):
|
||||
"""Helper function for reading in next sample."""
|
||||
#set total batch size, for example, 1800, and maximum size for each people, for example 45
|
||||
if self.seq is not None:
|
||||
while True:
|
||||
if self.cur >= len(self.seq):
|
||||
raise StopIteration
|
||||
idx = self.seq[self.cur]
|
||||
self.cur += 1
|
||||
if self.imgrec is not None:
|
||||
s = self.imgrec.read_idx(idx)
|
||||
header, img = recordio.unpack(s)
|
||||
label = header.label
|
||||
return label, img, None, None
|
||||
else:
|
||||
label, fname, bbox, landmark = self.imglist[idx]
|
||||
return label, self.read_image(fname), bbox, landmark
|
||||
else:
|
||||
s = self.imgrec.read()
|
||||
if s is None:
|
||||
raise StopIteration
|
||||
header, img = recordio.unpack(s)
|
||||
return header.label, img, None, None
|
||||
|
||||
def brightness_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
src *= alpha
|
||||
return src
|
||||
|
||||
def contrast_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
coef = np.array([[[0.299, 0.587, 0.114]]])
|
||||
gray = src * coef
|
||||
gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
|
||||
src *= alpha
|
||||
src += gray
|
||||
return src
|
||||
|
||||
def saturation_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
coef = np.array([[[0.299, 0.587, 0.114]]])
|
||||
gray = src * coef
|
||||
gray = np.sum(gray, axis=2, keepdims=True)
|
||||
gray *= (1.0 - alpha)
|
||||
src *= alpha
|
||||
src += gray
|
||||
return src
|
||||
|
||||
def color_aug(self, img, x):
|
||||
augs = [self.brightness_aug, self.contrast_aug, self.saturation_aug]
|
||||
random.shuffle(augs)
|
||||
for aug in augs:
|
||||
#print(img.shape)
|
||||
img = aug(img, x)
|
||||
#print(img.shape)
|
||||
return img
|
||||
|
||||
def mirror_aug(self, img):
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
for c in xrange(img.shape[2]):
|
||||
img[:,:,c] = np.fliplr(img[:,:,c])
|
||||
return img
|
||||
|
||||
|
||||
def next(self):
|
||||
if not self.is_init:
|
||||
self.reset()
|
||||
self.is_init = True
|
||||
"""Returns the next batch of data."""
|
||||
#print('in next', self.cur, self.labelcur)
|
||||
self.nbatch+=1
|
||||
batch_size = self.batch_size
|
||||
c, h, w = self.data_shape
|
||||
batch_data = nd.empty((batch_size, c, h, w))
|
||||
if self.provide_label is not None:
|
||||
batch_label = nd.empty(self.provide_label[0][1])
|
||||
i = 0
|
||||
try:
|
||||
while i < batch_size:
|
||||
label, s, bbox, landmark = self.next_sample()
|
||||
#if label[1]>=0.0 or label[2]>=0.0:
|
||||
# print(label[0:10])
|
||||
_data = self.imdecode(s)
|
||||
if self.rand_mirror:
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
_data = mx.ndarray.flip(data=_data, axis=1)
|
||||
#_data = _data.astype('float32')
|
||||
#_data -= 127.5
|
||||
#_data *= 0.0078125
|
||||
if self.cutoff>0:
|
||||
centerh = random.randint(0, _data.shape[0]-1)
|
||||
centerw = random.randint(0, _data.shape[1]-1)
|
||||
half = self.cutoff//2
|
||||
starth = max(0, centerh-half)
|
||||
endh = min(_data.shape[0], centerh+half)
|
||||
startw = max(0, centerw-half)
|
||||
endw = min(_data.shape[1], centerw+half)
|
||||
_data = _data.astype('float32')
|
||||
#print(starth, endh, startw, endw, _data.shape)
|
||||
_data[starth:endh, startw:endw, :] = 127.5
|
||||
data = [_data]
|
||||
try:
|
||||
self.check_valid_image(data)
|
||||
except RuntimeError as e:
|
||||
logging.debug('Invalid image, skipping: %s', str(e))
|
||||
continue
|
||||
#print('aa',data[0].shape)
|
||||
#data = self.augmentation_transform(data)
|
||||
#print('bb',data[0].shape)
|
||||
for datum in data:
|
||||
assert i < batch_size, 'Batch size must be multiples of augmenter output length'
|
||||
#print(datum.shape)
|
||||
batch_data[i][:] = self.postprocess_data(datum)
|
||||
batch_label[i][:] = label
|
||||
i += 1
|
||||
except StopIteration:
|
||||
if i<batch_size:
|
||||
raise StopIteration
|
||||
|
||||
return io.DataBatch([batch_data], [batch_label], batch_size - i)
|
||||
|
||||
def check_data_shape(self, data_shape):
|
||||
"""Checks if the input data shape is valid"""
|
||||
if not len(data_shape) == 3:
|
||||
raise ValueError('data_shape should have length 3, with dimensions CxHxW')
|
||||
if not data_shape[0] == 3:
|
||||
raise ValueError('This iterator expects inputs to have 3 channels.')
|
||||
|
||||
def check_valid_image(self, data):
|
||||
"""Checks if the input data is valid"""
|
||||
if len(data[0].shape) == 0:
|
||||
raise RuntimeError('Data shape is wrong')
|
||||
|
||||
def imdecode(self, s):
|
||||
"""Decodes a string or byte string to an NDArray.
|
||||
See mx.img.imdecode for more details."""
|
||||
img = mx.image.imdecode(s) #mx.ndarray
|
||||
return img
|
||||
|
||||
def read_image(self, fname):
|
||||
"""Reads an input image `fname` and returns the decoded raw bytes.
|
||||
|
||||
Example usage:
|
||||
----------
|
||||
>>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
|
||||
"""
|
||||
with open(os.path.join(self.path_root, fname), 'rb') as fin:
|
||||
img = fin.read()
|
||||
return img
|
||||
|
||||
def augmentation_transform(self, data):
|
||||
"""Transforms input data with specified augmentation."""
|
||||
for aug in self.auglist:
|
||||
data = [ret for src in data for ret in aug(src)]
|
||||
return data
|
||||
|
||||
def postprocess_data(self, datum):
|
||||
"""Final postprocessing step before image is loaded into the batch."""
|
||||
return nd.transpose(datum, axes=(2, 0, 1))
|
||||
|
||||
class FaceImageIterList(io.DataIter):
|
||||
def __init__(self, iter_list):
|
||||
assert len(iter_list)>0
|
||||
self.provide_data = iter_list[0].provide_data
|
||||
self.provide_label = iter_list[0].provide_label
|
||||
self.iter_list = iter_list
|
||||
self.cur_iter = None
|
||||
|
||||
def reset(self):
|
||||
self.cur_iter.reset()
|
||||
|
||||
def next(self):
|
||||
self.cur_iter = random.choice(self.iter_list)
|
||||
while True:
|
||||
try:
|
||||
ret = self.cur_iter.next()
|
||||
except StopIteration:
|
||||
self.cur_iter.reset()
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
import mxnet as mx
|
||||
from mxnet import gluon
|
||||
from mxnet import profiler
|
||||
from mxnet.gluon import nn
|
||||
from mxnet import ndarray as nd
|
||||
import fresnet
|
||||
|
||||
class EmbeddingBlock(gluon.HybridBlock):
|
||||
def __init__(self, emb_size = 512, mode='E', **kwargs):
|
||||
super(EmbeddingBlock, self).__init__(**kwargs)
|
||||
self.emb_size = emb_size
|
||||
print('mode', mode)
|
||||
with self.name_scope():
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
if mode=='D':
|
||||
self.body.add(nn.BatchNorm())
|
||||
self.body.add(nn.Activation('relu'))
|
||||
self.body.add(nn.GlobalAvgPool2D())
|
||||
self.body.add(nn.Flatten())
|
||||
self.body.add(nn.Dense(emb_size))
|
||||
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
|
||||
elif mode=='E':
|
||||
self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
self.body.add(nn.Dropout(0.4))
|
||||
#self.body.add(nn.Flatten())
|
||||
self.body.add(nn.Dense(emb_size))
|
||||
self.body.add(nn.BatchNorm(scale=False, epsilon=2e-5, prefix='fc1'))
|
||||
elif mode=='Z':
|
||||
#self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
#self.body.add(nn.Activation('relu'))
|
||||
#self.body.add(nn.GlobalAvgPool2D())
|
||||
#self.body.add(nn.Flatten())
|
||||
self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
self.body.add(nn.Dropout(0.4))
|
||||
#self.body.add(nn.Flatten())
|
||||
self.body.add(nn.Dense(emb_size))
|
||||
#self.body.add(nn.BatchNorm(scale=False, epsilon=2e-5, prefix='fc1'))
|
||||
else:
|
||||
self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
self.body.add(nn.Activation('relu'))
|
||||
self.body.add(nn.GlobalAvgPool2D())
|
||||
self.body.add(nn.Flatten())
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
x = self.body(x)
|
||||
#bn_mom = 0.9
|
||||
#x = F.BatchNorm(data=x, fix_gamma=True, eps=2e-5, momentum=bn_mom)
|
||||
return x
|
||||
#return x
|
||||
|
||||
class ArcMarginBlock(gluon.HybridBlock):
|
||||
def __init__(self, args, **kwargs):
|
||||
super(ArcMarginBlock, self).__init__(**kwargs)
|
||||
self.margin_s = args.margin_s
|
||||
self.margin_m = args.margin_m
|
||||
self.margin_a = args.margin_a
|
||||
self.margin_b = args.margin_b
|
||||
self.num_classes = args.num_classes
|
||||
self.emb_size = args.emb_size
|
||||
#self.weight = gluon.Parameter(name = 'fc7_weight', shape = (self.num_classes, self.emb_size))
|
||||
#self.weight.initialize()
|
||||
#self._weight = nd.empty(shape = (self.num_classes, self.emb_size))
|
||||
#if self.margin_a>0.0:
|
||||
with self.name_scope():
|
||||
self.fc7_weight = self.params.get('fc7_weight', shape=(self.num_classes, self.emb_size))
|
||||
#else:
|
||||
# self.dense = nn.Dense(self.num_classes, prefix='fc7')
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
feat = fresnet.get(args.num_layers,
|
||||
version_unit=args.version_unit,
|
||||
version_act=args.version_act)
|
||||
self.body.add(feat)
|
||||
self.body.add(EmbeddingBlock(args.emb_size, args.version_output, prefix=''))
|
||||
|
||||
def feature(self, x):
|
||||
feat = self.body(x)
|
||||
return feat
|
||||
|
||||
def hybrid_forward(self, F, x, label, fc7_weight):
|
||||
feat = self.body(x)
|
||||
if self.margin_a==0.0:
|
||||
fc7 = F.FullyConnected(feat, fc7_weight, no_bias = True, num_hidden=self.num_classes, name='fc7')
|
||||
#fc7 = self.dense(feat)
|
||||
#with x.context:
|
||||
# _w = self._weight.data()
|
||||
#_b = self._bias.data()
|
||||
#fc7 = nd.FullyConnected(data=feat, weight=_w, bias = _b, num_hidden=self.num_classes, name='fc7')
|
||||
#fc7 = F.softmax_cross_entropy(data = fc7, label=label)
|
||||
return fc7
|
||||
|
||||
nx = F.L2Normalization(feat, mode='instance', name='fc1n')*self.margin_s
|
||||
w = F.L2Normalization(fc7_weight, mode='instance')
|
||||
fc7 = F.FullyConnected(nx, w, no_bias = True, num_hidden=self.num_classes, name='fc7')
|
||||
#fc7 = self.dense(nx)
|
||||
if self.margin_a!=1.0 or self.margin_m!=0.0 or self.margin_b!=0.0:
|
||||
if self.margin_a==1.0 and self.margin_m==0.0:
|
||||
s_m = s*self.margin_b
|
||||
gt_one_hot = F.one_hot(label, depth = self.num_classes, on_value = s_m, off_value = 0.0)
|
||||
fc7 = fc7-gt_one_hot
|
||||
else:
|
||||
zy = F.pick(fc7, label, axis=1)
|
||||
cos_t = zy/self.margin_s
|
||||
t = F.arccos(cos_t)
|
||||
if self.margin_a!=1.0:
|
||||
t = t*self.margin_a
|
||||
if self.margin_m>0.0:
|
||||
t = t+self.margin_m
|
||||
body = F.cos(t)
|
||||
if self.margin_b>0.0:
|
||||
body = body - self.margin_b
|
||||
new_zy = body*self.margin_s
|
||||
diff = new_zy - zy
|
||||
diff = F.expand_dims(diff, 1)
|
||||
gt_one_hot = F.one_hot(label, depth = self.num_classes, on_value = 1.0, off_value = 0.0)
|
||||
body = F.broadcast_mul(gt_one_hot, diff)
|
||||
fc7 = fc7+body
|
||||
return fc7
|
||||
|
||||
#def hybrid_forward(self, F, x):
|
||||
# feat = self.body(x)
|
||||
# return feat
|
||||
|
||||
class DenseBlock(gluon.HybridBlock):
|
||||
def __init__(self, args, **kwargs):
|
||||
super(DenseBlock, self).__init__(**kwargs)
|
||||
self.num_classes = args.num_classes
|
||||
self.emb_size = args.emb_size
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
feat = fresnet.get(args.num_layers,
|
||||
version_unit=args.version_unit,
|
||||
version_act=args.version_act)
|
||||
self.body.add(feat)
|
||||
self.body.add(EmbeddingBlock(args.emb_size, args.version_output, prefix=''))
|
||||
self.dense = nn.Dense(self.num_classes, prefix='fc7')
|
||||
|
||||
def feature(self, x):
|
||||
feat = self.body(x)
|
||||
return feat
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
feat = self.body(x)
|
||||
fc7 = self.dense(feat)
|
||||
return fc7
|
||||
|
||||
class ArcMarginTestBlock(gluon.Block):
|
||||
def __init__(self, args, **kwargs):
|
||||
super(ArcMarginTestBlock, self).__init__(**kwargs)
|
||||
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
feat = fresnet.get(args.num_layers,
|
||||
version_unit=args.version_unit,
|
||||
version_act=args.version_act)
|
||||
self.body.add(feat)
|
||||
self.body.add(EmbeddingBlock(args.emb_size, args.version_output))
|
||||
|
||||
def forward(self, x):
|
||||
feat = self.body(x)
|
||||
return feat
|
||||
|
||||
class _GABlock(gluon.HybridBlock):
|
||||
def __init__(self, args, num_classes, **kwargs):
|
||||
super(_GABlock, self).__init__(**kwargs)
|
||||
with self.name_scope():
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
feat = fresnet.get(args.num_layers,
|
||||
version_unit=args.version_unit,
|
||||
version_act=args.version_act)
|
||||
self.body.add(feat)
|
||||
self.body.add(EmbeddingBlock(mode=args.version_output))
|
||||
self.body.add(nn.Dense(num_classes))
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
return self.body(x)
|
||||
|
||||
|
||||
class GABlock(gluon.HybridBlock):
|
||||
def __init__(self, args, **kwargs):
|
||||
super(GABlock, self).__init__(**kwargs)
|
||||
with self.name_scope():
|
||||
#args.num_classes = 2
|
||||
self.bodyg = _GABlock(args, 2, prefix='gender_')
|
||||
#args.num_classes = 200
|
||||
self.bodya = _GABlock(args, 200, prefix='age_')
|
||||
#if args.task=='age':
|
||||
# self.bodyg.collect_params().setattr('grad_req', 'null')
|
||||
#elif args.task=='gender':
|
||||
# self.bodya.collect_params().setattr('grad_req', 'null')
|
||||
#self.body = nn.HybridSequential(prefix='')
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
g = self.bodyg(x)
|
||||
a = self.bodya(x)
|
||||
f = F.concat(g,a,dim=1, name='fc1')
|
||||
return [f,g,a]
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
|
||||
class EmbeddingBlock(HybridBlock):
|
||||
def __init__(self, emb_size = 512, mode='E', **kwargs):
|
||||
super(EmbeddingBlock, self).__init__(**kwargs)
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
if mode=='D':
|
||||
self.body.add(nn.BatchNorm())
|
||||
self.body.add(nn.Activation('relu'))
|
||||
self.body.add(nn.GlobalAvgPool2D())
|
||||
self.body.add(nn.Flatten())
|
||||
self.body.add(nn.Dense(emb_size))
|
||||
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
|
||||
elif mode=='E':
|
||||
self.body.add(nn.BatchNorm())
|
||||
self.body.add(nn.Dropout(0.4))
|
||||
self.body.add(nn.Dense(emb_size))
|
||||
self.body.add(nn.BatchNorm(scale=False, prefix='fc1'))
|
||||
else:
|
||||
self.body.add(nn.BatchNorm())
|
||||
self.body.add(nn.Activation('relu'))
|
||||
self.body.add(nn.GlobalAvgPool2D())
|
||||
self.body.add(nn.Flatten())
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
x = self.body(x)
|
||||
return x
|
||||
|
||||
class MarginBlock(HybridBlock):
|
||||
def __init__(self, args, **kwargs):
|
||||
@@ -1,232 +0,0 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# coding: utf-8
|
||||
# pylint: disable= arguments-differ
|
||||
"""ResNets, implemented in Gluon."""
|
||||
from __future__ import division
|
||||
|
||||
#__all__ = ['ResNetV1', 'ResNetV2',
|
||||
# 'BasicBlockV1', 'BasicBlockV2',
|
||||
# 'BottleneckV1', 'BottleneckV2',
|
||||
# 'resnet18_v1', 'resnet34_v1', 'resnet50_v1', 'resnet101_v1', 'resnet152_v1',
|
||||
# 'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet101_v2', 'resnet152_v2',
|
||||
# 'get_resnet']
|
||||
|
||||
import os
|
||||
|
||||
#from ....context import cpu
|
||||
from mxnet import gluon
|
||||
from mxnet import profiler
|
||||
from mxnet.gluon import nn
|
||||
from mxnet.gluon.block import HybridBlock
|
||||
|
||||
# Helpers
|
||||
def _conv3x3(channels, stride, in_channels):
|
||||
return nn.Conv2D(channels, kernel_size=3, strides=stride, padding=1,
|
||||
use_bias=False, in_channels=in_channels)
|
||||
|
||||
def _act(act_type):
|
||||
if act_type=='prelu':
|
||||
return nn.PReLU()
|
||||
else:
|
||||
return nn.Activation(act_type)
|
||||
|
||||
# Blocks
|
||||
class BasicBlockV1(HybridBlock):
|
||||
r"""BasicBlock V1 from `"Deep Residual Learning for Image Recognition"
|
||||
<http://arxiv.org/abs/1512.03385>`_ paper.
|
||||
This is used for ResNet V1 for 18, 34 layers.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
channels : int
|
||||
Number of output channels.
|
||||
stride : int
|
||||
Stride size.
|
||||
downsample : bool, default False
|
||||
Whether to downsample the input.
|
||||
in_channels : int, default 0
|
||||
Number of input channels. Default is 0, to infer from the graph.
|
||||
"""
|
||||
def __init__(self, channels, stride, downsample=False, in_channels=0, act_type = 'relu', **kwargs):
|
||||
super(BasicBlockV1, self).__init__(**kwargs)
|
||||
self.act_type = act_type
|
||||
self.body = nn.HybridSequential(prefix='')
|
||||
self.body.add(_conv3x3(channels, 1, in_channels))
|
||||
self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
self.body.add(_act(act_type))
|
||||
self.body.add(_conv3x3(channels, stride, channels))
|
||||
self.body.add(nn.BatchNorm(epsilon=2e-5))
|
||||
if self.act_type=='prelu':
|
||||
self.prelu = nn.PReLU()
|
||||
if downsample:
|
||||
self.downsample = nn.HybridSequential(prefix='')
|
||||
self.downsample.add(nn.Conv2D(channels, kernel_size=1, strides=stride,
|
||||
use_bias=False, in_channels=in_channels))
|
||||
self.downsample.add(nn.BatchNorm(epsilon=2e-5))
|
||||
else:
|
||||
self.downsample = None
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
residual = x
|
||||
|
||||
x = self.body(x)
|
||||
|
||||
if self.downsample:
|
||||
residual = self.downsample(residual)
|
||||
|
||||
if self.act_type=='prelu':
|
||||
x = self.prelu(x+residual)
|
||||
#x = F.LeakyReLU(residual+x, act_type = self.act_type)
|
||||
else:
|
||||
x = F.Activation(x+residual, act_type=self.act_type)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class BasicBlockV2(HybridBlock):
|
||||
r"""BasicBlock V2 from
|
||||
`"Identity Mappings in Deep Residual Networks"
|
||||
<https://arxiv.org/abs/1603.05027>`_ paper.
|
||||
This is used for ResNet V2 for 18, 34 layers.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
channels : int
|
||||
Number of output channels.
|
||||
stride : int
|
||||
Stride size.
|
||||
downsample : bool, default False
|
||||
Whether to downsample the input.
|
||||
in_channels : int, default 0
|
||||
Number of input channels. Default is 0, to infer from the graph.
|
||||
"""
|
||||
def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs):
|
||||
super(BasicBlockV2, self).__init__(**kwargs)
|
||||
self.bn1 = nn.BatchNorm()
|
||||
self.conv1 = _conv3x3(channels, stride, in_channels)
|
||||
self.bn2 = nn.BatchNorm()
|
||||
self.conv2 = _conv3x3(channels, 1, channels)
|
||||
if downsample:
|
||||
self.downsample = nn.Conv2D(channels, 1, stride, use_bias=False,
|
||||
in_channels=in_channels)
|
||||
else:
|
||||
self.downsample = None
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
residual = x
|
||||
x = self.bn1(x)
|
||||
x = F.Activation(x, act_type='relu')
|
||||
if self.downsample:
|
||||
residual = self.downsample(x)
|
||||
x = self.conv1(x)
|
||||
|
||||
x = self.bn2(x)
|
||||
x = F.Activation(x, act_type='relu')
|
||||
x = self.conv2(x)
|
||||
|
||||
return x + residual
|
||||
|
||||
|
||||
class ResNet(HybridBlock):
|
||||
r"""ResNet V2 model from
|
||||
`"Identity Mappings in Deep Residual Networks"
|
||||
<https://arxiv.org/abs/1603.05027>`_ paper.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
block : HybridBlock
|
||||
Class for the residual block. Options are BasicBlockV1, BottleneckV1.
|
||||
layers : list of int
|
||||
Numbers of layers in each block
|
||||
channels : list of int
|
||||
Numbers of channels in each block. Length should be one larger than layers list.
|
||||
classes : int, default 1000
|
||||
Number of classification classes.
|
||||
thumbnail : bool, default False
|
||||
Enable thumbnail.
|
||||
"""
|
||||
def __init__(self, layers, channels, **kwargs):
|
||||
version_unit = kwargs.get('version_unit', 1)
|
||||
act_type = kwargs.get('version_act', 'prelu')
|
||||
self.act_type = act_type
|
||||
del kwargs['version_unit']
|
||||
del kwargs['version_act']
|
||||
super(ResNet, self).__init__(**kwargs)
|
||||
assert len(layers) == len(channels) - 1
|
||||
print(version_unit, act_type)
|
||||
if version_unit==1:
|
||||
block = BasicBlockV1
|
||||
elif version_unit==2:
|
||||
block = BasicBlockV2
|
||||
with self.name_scope():
|
||||
self.features = nn.HybridSequential(prefix='')
|
||||
#self.features.add(nn.BatchNorm(scale=False, center=False))
|
||||
#self.features.add(nn.BatchNorm())
|
||||
self.features.add(_conv3x3(channels[0], 1, 0))
|
||||
self.features.add(nn.BatchNorm(epsilon=2e-5))
|
||||
self.features.add(_act(act_type))
|
||||
|
||||
in_channels = channels[0]
|
||||
for i, num_layer in enumerate(layers):
|
||||
#stride = 1 if i == 0 else 2
|
||||
stride = 2
|
||||
self.features.add(self._make_layer(block, num_layer, channels[i+1],
|
||||
stride, i+1, in_channels=in_channels))
|
||||
in_channels = channels[i+1]
|
||||
#self.features.add(nn.BatchNorm())
|
||||
#self.features.add(nn.Activation('relu'))
|
||||
#self.features.add(nn.GlobalAvgPool2D())
|
||||
#self.features.add(nn.Flatten())
|
||||
|
||||
#self.output = nn.Dense(classes, in_units=in_channels)
|
||||
|
||||
def _make_layer(self, block, layers, channels, stride, stage_index, in_channels=0):
|
||||
layer = nn.HybridSequential(prefix='stage%d_'%stage_index)
|
||||
with layer.name_scope():
|
||||
#print(channels, in_channels)
|
||||
layer.add(block(channels, stride, True, in_channels=in_channels, act_type = self.act_type,
|
||||
prefix=''))
|
||||
for _ in range(layers-1):
|
||||
layer.add(block(channels, 1, False, in_channels=channels, act_type = self.act_type, prefix=''))
|
||||
return layer
|
||||
|
||||
def hybrid_forward(self, F, x):
|
||||
x = x-127.5
|
||||
x = x*0.0078125
|
||||
x = self.features(x)
|
||||
return x
|
||||
|
||||
|
||||
# Specification
|
||||
resnet_spec = {18: ('basic_block', [2, 2, 2, 2], [64, 64, 128, 256, 512]),
|
||||
34: ('basic_block', [3, 4, 6, 3], [64, 64, 128, 256, 512]),
|
||||
50: ('basic_block', [3, 4, 14, 3], [64, 64, 128, 256, 512]),
|
||||
100: ('basic_block', [3, 13, 30, 3], [64, 64, 128, 256, 512]),
|
||||
152: ('bottle_neck', [3, 8, 36, 3], [64, 256, 512, 1024, 2048])}
|
||||
|
||||
|
||||
# Constructor
|
||||
def get(num_layers, **kwargs):
|
||||
assert num_layers in resnet_spec, \
|
||||
"Invalid number of layers: %d. Options are %s"%(
|
||||
num_layers, str(resnet_spec.keys()))
|
||||
block_type, layers, channels = resnet_spec[num_layers]
|
||||
net = ResNet(layers, channels, **kwargs)
|
||||
return net
|
||||
|
||||
@@ -1,285 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import random
|
||||
import logging
|
||||
import sys
|
||||
import numbers
|
||||
import math
|
||||
import sklearn
|
||||
import datetime
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
import mxnet as mx
|
||||
from mxnet import ndarray as nd
|
||||
from mxnet import io
|
||||
from mxnet import recordio
|
||||
#sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'common'))
|
||||
import face_preprocess
|
||||
import multiprocessing
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
class FaceImageIter(io.DataIter):
|
||||
|
||||
def __init__(self, batch_size, data_shape,
|
||||
path_imgrec = None,
|
||||
shuffle=False, aug_list=None, mean = None,
|
||||
rand_mirror = False, cutoff = 0,
|
||||
data_name='data', label_name='softmax_label', **kwargs):
|
||||
super(FaceImageIter, self).__init__()
|
||||
assert path_imgrec
|
||||
if path_imgrec:
|
||||
logging.info('loading recordio %s...',
|
||||
path_imgrec)
|
||||
path_imgidx = path_imgrec[0:-4]+".idx"
|
||||
self.imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
|
||||
s = self.imgrec.read_idx(0)
|
||||
header, _ = recordio.unpack(s)
|
||||
if header.flag>0:
|
||||
print('header0 label', header.label)
|
||||
self.header0 = (int(header.label[0]), int(header.label[1]))
|
||||
#assert(header.flag==1)
|
||||
self.imgidx = range(1, int(header.label[0]))
|
||||
self.id2range = {}
|
||||
self.seq_identity = range(int(header.label[0]), int(header.label[1]))
|
||||
for identity in self.seq_identity:
|
||||
s = self.imgrec.read_idx(identity)
|
||||
header, _ = recordio.unpack(s)
|
||||
a,b = int(header.label[0]), int(header.label[1])
|
||||
self.id2range[identity] = (a,b)
|
||||
count = b-a
|
||||
print('id2range', len(self.id2range))
|
||||
else:
|
||||
self.imgidx = list(self.imgrec.keys)
|
||||
if shuffle:
|
||||
self.seq = self.imgidx
|
||||
self.oseq = self.imgidx
|
||||
print(len(self.seq))
|
||||
else:
|
||||
self.seq = None
|
||||
|
||||
self.mean = mean
|
||||
self.nd_mean = None
|
||||
if self.mean:
|
||||
self.mean = np.array(self.mean, dtype=np.float32).reshape(1,1,3)
|
||||
self.nd_mean = mx.nd.array(self.mean).reshape((1,1,3))
|
||||
|
||||
self.check_data_shape(data_shape)
|
||||
self.provide_data = [(data_name, (batch_size,) + data_shape)]
|
||||
self.batch_size = batch_size
|
||||
self.data_shape = data_shape
|
||||
self.shuffle = shuffle
|
||||
self.image_size = '%d,%d'%(data_shape[1],data_shape[2])
|
||||
self.rand_mirror = rand_mirror
|
||||
print('rand_mirror', rand_mirror)
|
||||
self.cutoff = cutoff
|
||||
self.provide_label = [(label_name, (batch_size,))]
|
||||
#print(self.provide_label[0][1])
|
||||
self.cur = 0
|
||||
self.nbatch = 0
|
||||
self.is_init = False
|
||||
|
||||
|
||||
def reset(self):
|
||||
"""Resets the iterator to the beginning of the data."""
|
||||
print('call reset()')
|
||||
self.cur = 0
|
||||
if self.shuffle:
|
||||
random.shuffle(self.seq)
|
||||
if self.seq is None and self.imgrec is not None:
|
||||
self.imgrec.reset()
|
||||
|
||||
def num_samples(self):
|
||||
return len(self.seq)
|
||||
|
||||
def next_sample(self):
|
||||
"""Helper function for reading in next sample."""
|
||||
#set total batch size, for example, 1800, and maximum size for each people, for example 45
|
||||
if self.seq is not None:
|
||||
while True:
|
||||
if self.cur >= len(self.seq):
|
||||
raise StopIteration
|
||||
idx = self.seq[self.cur]
|
||||
self.cur += 1
|
||||
if self.imgrec is not None:
|
||||
s = self.imgrec.read_idx(idx)
|
||||
header, img = recordio.unpack(s)
|
||||
label = header.label
|
||||
if not isinstance(label, numbers.Number):
|
||||
label = label[0]
|
||||
return label, img, None, None
|
||||
else:
|
||||
label, fname, bbox, landmark = self.imglist[idx]
|
||||
return label, self.read_image(fname), bbox, landmark
|
||||
else:
|
||||
s = self.imgrec.read()
|
||||
if s is None:
|
||||
raise StopIteration
|
||||
header, img = recordio.unpack(s)
|
||||
return header.label, img, None, None
|
||||
|
||||
def brightness_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
src *= alpha
|
||||
return src
|
||||
|
||||
def contrast_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
coef = np.array([[[0.299, 0.587, 0.114]]])
|
||||
gray = src * coef
|
||||
gray = (3.0 * (1.0 - alpha) / gray.size) * np.sum(gray)
|
||||
src *= alpha
|
||||
src += gray
|
||||
return src
|
||||
|
||||
def saturation_aug(self, src, x):
|
||||
alpha = 1.0 + random.uniform(-x, x)
|
||||
coef = np.array([[[0.299, 0.587, 0.114]]])
|
||||
gray = src * coef
|
||||
gray = np.sum(gray, axis=2, keepdims=True)
|
||||
gray *= (1.0 - alpha)
|
||||
src *= alpha
|
||||
src += gray
|
||||
return src
|
||||
|
||||
def color_aug(self, img, x):
|
||||
augs = [self.brightness_aug, self.contrast_aug, self.saturation_aug]
|
||||
random.shuffle(augs)
|
||||
for aug in augs:
|
||||
#print(img.shape)
|
||||
img = aug(img, x)
|
||||
#print(img.shape)
|
||||
return img
|
||||
|
||||
def mirror_aug(self, img):
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
for c in xrange(img.shape[2]):
|
||||
img[:,:,c] = np.fliplr(img[:,:,c])
|
||||
return img
|
||||
|
||||
|
||||
def next(self):
|
||||
if not self.is_init:
|
||||
self.reset()
|
||||
self.is_init = True
|
||||
"""Returns the next batch of data."""
|
||||
#print('in next', self.cur, self.labelcur)
|
||||
self.nbatch+=1
|
||||
batch_size = self.batch_size
|
||||
c, h, w = self.data_shape
|
||||
batch_data = nd.empty((batch_size, c, h, w))
|
||||
if self.provide_label is not None:
|
||||
batch_label = nd.empty(self.provide_label[0][1])
|
||||
i = 0
|
||||
try:
|
||||
while i < batch_size:
|
||||
label, s, bbox, landmark = self.next_sample()
|
||||
_data = self.imdecode(s)
|
||||
if self.rand_mirror:
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
_data = mx.ndarray.flip(data=_data, axis=1)
|
||||
if self.nd_mean is not None:
|
||||
_data = _data.astype('float32')
|
||||
_data -= 127.5
|
||||
_data *= 0.0078125
|
||||
if self.cutoff>0:
|
||||
centerh = random.randint(0, _data.shape[0]-1)
|
||||
centerw = random.randint(0, _data.shape[1]-1)
|
||||
half = self.cutoff//2
|
||||
starth = max(0, centerh-half)
|
||||
endh = min(_data.shape[0], centerh+half)
|
||||
startw = max(0, centerw-half)
|
||||
endw = min(_data.shape[1], centerw+half)
|
||||
_data = _data.astype('float32')
|
||||
#print(starth, endh, startw, endw, _data.shape)
|
||||
_data[starth:endh, startw:endw, :] = 127.5
|
||||
data = [_data]
|
||||
try:
|
||||
self.check_valid_image(data)
|
||||
except RuntimeError as e:
|
||||
logging.debug('Invalid image, skipping: %s', str(e))
|
||||
continue
|
||||
#print('aa',data[0].shape)
|
||||
#data = self.augmentation_transform(data)
|
||||
#print('bb',data[0].shape)
|
||||
for datum in data:
|
||||
assert i < batch_size, 'Batch size must be multiples of augmenter output length'
|
||||
#print(datum.shape)
|
||||
batch_data[i][:] = self.postprocess_data(datum)
|
||||
batch_label[i][:] = label
|
||||
i += 1
|
||||
except StopIteration:
|
||||
if i<batch_size:
|
||||
raise StopIteration
|
||||
|
||||
return io.DataBatch([batch_data], [batch_label], batch_size - i)
|
||||
|
||||
def check_data_shape(self, data_shape):
|
||||
"""Checks if the input data shape is valid"""
|
||||
if not len(data_shape) == 3:
|
||||
raise ValueError('data_shape should have length 3, with dimensions CxHxW')
|
||||
if not data_shape[0] == 3:
|
||||
raise ValueError('This iterator expects inputs to have 3 channels.')
|
||||
|
||||
def check_valid_image(self, data):
|
||||
"""Checks if the input data is valid"""
|
||||
if len(data[0].shape) == 0:
|
||||
raise RuntimeError('Data shape is wrong')
|
||||
|
||||
def imdecode(self, s):
|
||||
"""Decodes a string or byte string to an NDArray.
|
||||
See mx.img.imdecode for more details."""
|
||||
img = mx.image.imdecode(s) #mx.ndarray
|
||||
return img
|
||||
|
||||
def read_image(self, fname):
|
||||
"""Reads an input image `fname` and returns the decoded raw bytes.
|
||||
|
||||
Example usage:
|
||||
----------
|
||||
>>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
|
||||
"""
|
||||
with open(os.path.join(self.path_root, fname), 'rb') as fin:
|
||||
img = fin.read()
|
||||
return img
|
||||
|
||||
def augmentation_transform(self, data):
|
||||
"""Transforms input data with specified augmentation."""
|
||||
for aug in self.auglist:
|
||||
data = [ret for src in data for ret in aug(src)]
|
||||
return data
|
||||
|
||||
def postprocess_data(self, datum):
|
||||
"""Final postprocessing step before image is loaded into the batch."""
|
||||
return nd.transpose(datum, axes=(2, 0, 1))
|
||||
|
||||
class FaceImageIterList(io.DataIter):
|
||||
def __init__(self, iter_list):
|
||||
assert len(iter_list)>0
|
||||
self.provide_data = iter_list[0].provide_data
|
||||
self.provide_label = iter_list[0].provide_label
|
||||
self.iter_list = iter_list
|
||||
self.cur_iter = None
|
||||
|
||||
def reset(self):
|
||||
self.cur_iter.reset()
|
||||
|
||||
def next(self):
|
||||
self.cur_iter = random.choice(self.iter_list)
|
||||
while True:
|
||||
try:
|
||||
ret = self.cur_iter.next()
|
||||
except StopIteration:
|
||||
self.cur_iter.reset()
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
747
gluon/train.py
747
gluon/train.py
@@ -1,747 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import math
|
||||
import random
|
||||
import logging
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
import sklearn
|
||||
from image_iter import FaceImageIter
|
||||
from age_iter import FaceImageIter as FaceImageIterAge
|
||||
#from image_iter import FaceImageIterList
|
||||
import mxnet as mx
|
||||
from mxnet import gluon
|
||||
from mxnet import profiler
|
||||
from mxnet.gluon import nn
|
||||
from mxnet import ndarray as nd
|
||||
from mxnet import autograd as ag
|
||||
from mxnet.test_utils import get_mnist_iterator
|
||||
from mxnet.metric import Accuracy, TopKAccuracy, CompositeEvalMetric
|
||||
import argparse
|
||||
import mxnet.optimizer as optimizer
|
||||
#sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'eval'))
|
||||
import verification
|
||||
#sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
|
||||
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'src', 'common'))
|
||||
import face_image
|
||||
#sys.path.append(os.path.join(os.path.dirname(__file__), 'eval'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'blocks'))
|
||||
import fresnet
|
||||
from UDD import *
|
||||
#import finception_resnet_v2
|
||||
#import fmobilenet
|
||||
#import fmobilenetv2
|
||||
#import fmobilefacenet
|
||||
#import fxception
|
||||
#import fdensenet
|
||||
#import fdpn
|
||||
#import fnasnet
|
||||
#import spherenet
|
||||
#sys.path.append(os.path.join(os.path.dirname(__file__), 'losses'))
|
||||
#import center_loss
|
||||
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
AGE = 100
|
||||
|
||||
args = None
|
||||
|
||||
|
||||
class AccMetric(mx.metric.EvalMetric):
|
||||
def __init__(self):
|
||||
self.axis = 1
|
||||
super(AccMetric, self).__init__(
|
||||
'acc', axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
self.losses = []
|
||||
self.count = 0
|
||||
|
||||
def update(self, labels, preds):
|
||||
self.count+=1
|
||||
#preds = [preds[1]] #use softmax output
|
||||
for label, pred_label in zip(labels, preds):
|
||||
if pred_label.shape != label.shape:
|
||||
pred_label = mx.ndarray.argmax(pred_label, axis=self.axis)
|
||||
pred_label = pred_label.asnumpy().astype('int32').flatten()
|
||||
label = label.asnumpy()
|
||||
if label.ndim==2:
|
||||
label = label[:,0]
|
||||
label = label.astype('int32').flatten()
|
||||
assert label.shape==pred_label.shape
|
||||
self.sum_metric += (pred_label.flat == label.flat).sum()
|
||||
self.num_inst += len(pred_label.flat)
|
||||
|
||||
class LossValueMetric(mx.metric.EvalMetric):
|
||||
def __init__(self):
|
||||
self.axis = 1
|
||||
super(LossValueMetric, self).__init__(
|
||||
'lossvalue', axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
self.losses = []
|
||||
|
||||
def update(self, labels, preds):
|
||||
loss = preds[-1].asnumpy()[0]
|
||||
self.sum_metric += loss
|
||||
self.num_inst += 1.0
|
||||
gt_label = preds[-2].asnumpy()
|
||||
#print(gt_label)
|
||||
|
||||
class MAEMetric(mx.metric.EvalMetric):
|
||||
def __init__(self):
|
||||
self.axis = 1
|
||||
super(MAEMetric, self).__init__(
|
||||
'MAE', axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
self.losses = []
|
||||
self.count = 0
|
||||
|
||||
def update(self, labels, preds):
|
||||
self.count+=1
|
||||
label = labels[0].asnumpy()
|
||||
label_age = np.count_nonzero(label, axis=1)
|
||||
pred_age = np.zeros( label_age.shape, dtype=np.int)
|
||||
#pred_age = np.zeros( label_age.shape, dtype=np.float32)
|
||||
pred = preds[0].asnumpy()
|
||||
for i in xrange(AGE):
|
||||
_pred = pred[:,i*2:(i*2+2)]
|
||||
_pred = np.argmax(_pred, axis=1)
|
||||
#pred = pred[:,1]
|
||||
pred_age += _pred
|
||||
#pred_age = pred_age.astype(np.int)
|
||||
mae = np.mean(np.abs(label_age - pred_age))
|
||||
self.sum_metric += mae
|
||||
self.num_inst += 1.0
|
||||
|
||||
class CUMMetric(mx.metric.EvalMetric):
|
||||
def __init__(self, n=5):
|
||||
self.axis = 1
|
||||
self.n = n
|
||||
super(CUMMetric, self).__init__(
|
||||
'CUM_%d'%n, axis=self.axis,
|
||||
output_names=None, label_names=None)
|
||||
self.losses = []
|
||||
self.count = 0
|
||||
|
||||
def update(self, labels, preds):
|
||||
self.count+=1
|
||||
label = labels[0].asnumpy()
|
||||
label_age = np.count_nonzero(label, axis=1)
|
||||
pred_age = np.zeros( label_age.shape, dtype=np.int)
|
||||
pred = preds[0].asnumpy()
|
||||
for i in xrange(AGE):
|
||||
_pred = pred[:,i*2:(i*2+2)]
|
||||
_pred = np.argmax(_pred, axis=1)
|
||||
#pred = pred[:,1]
|
||||
pred_age += _pred
|
||||
diff = np.abs(label_age - pred_age)
|
||||
cum = np.sum( (diff<self.n) )
|
||||
self.sum_metric += cum
|
||||
self.num_inst += len(label_age)
|
||||
|
||||
def parse_args():
|
||||
global args
|
||||
parser = argparse.ArgumentParser(description='Train face network')
|
||||
# general
|
||||
parser.add_argument('--data-dir', default='', help='training set directory')
|
||||
parser.add_argument('--gender-data-dir', default='', help='training set directory')
|
||||
parser.add_argument('--age-data-dir', default='', help='training set directory')
|
||||
parser.add_argument('--prefix', default='../model/model', help='directory to save model.')
|
||||
parser.add_argument('--pretrained', default='', help='pretrained model to load')
|
||||
parser.add_argument('--ckpt', type=int, default=1, help='checkpoint saving option. 0: discard saving. 1: save when necessary. 2: always save')
|
||||
parser.add_argument('--loss-type', type=int, default=4, help='loss type')
|
||||
parser.add_argument('--verbose', type=int, default=2000, help='do verification testing and model saving every verbose batches')
|
||||
parser.add_argument('--max-steps', type=int, default=0, help='max training batches')
|
||||
parser.add_argument('--end-epoch', type=int, default=100000, help='training epoch size.')
|
||||
parser.add_argument('--network', default='r50', help='specify network')
|
||||
parser.add_argument('--version-output', type=str, default='E', help='network embedding output config')
|
||||
parser.add_argument('--version-unit', type=int, default=1, help='resnet unit config')
|
||||
parser.add_argument('--version-act', type=str, default='relu', help='network activation config')
|
||||
parser.add_argument('--lr', type=float, default=0.1, help='start learning rate')
|
||||
parser.add_argument('--lr-steps', type=str, default='', help='steps of lr changing')
|
||||
parser.add_argument('--wd', type=float, default=0.0005, help='weight decay')
|
||||
parser.add_argument('--fc7-wd-mult', type=float, default=1.0, help='weight decay mult for fc7')
|
||||
parser.add_argument('--bn-mom', type=float, default=0.9, help='bn mom')
|
||||
parser.add_argument('--mom', type=float, default=0.9, help='momentum')
|
||||
parser.add_argument('--emb-size', type=int, default=512, help='embedding length')
|
||||
parser.add_argument('--per-batch-size', type=int, default=128, help='batch size in each context')
|
||||
parser.add_argument('--margin-m', type=float, default=0.5, help='margin for loss')
|
||||
parser.add_argument('--margin-s', type=float, default=64.0, help='scale for feature')
|
||||
parser.add_argument('--margin-a', type=float, default=1.0, help='')
|
||||
parser.add_argument('--margin-b', type=float, default=0.0, help='')
|
||||
parser.add_argument('--rand-mirror', type=int, default=1, help='if do random mirror in training')
|
||||
parser.add_argument('--cutoff', type=int, default=0, help='cut off aug')
|
||||
parser.add_argument('--eval', type=str, default='lfw,cfp_fp,agedb_30', help='verification targets')
|
||||
parser.add_argument('--task', type=str, default='', help='')
|
||||
parser.add_argument('--mode', type=str, default='gluon', help='')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
def get_model():
|
||||
#print('init resnet', args.num_layers)
|
||||
if args.task=='':
|
||||
if args.margin_a>0.0:
|
||||
return ArcMarginBlock(args, prefix='')
|
||||
else:
|
||||
return DenseBlock(args, prefix='')
|
||||
else:#AGE or GENDER
|
||||
return GABlock(args, prefix='')
|
||||
|
||||
|
||||
#def get_symbol(args, arg_params, aux_params):
|
||||
# data_shape = (args.image_channel,args.image_h,args.image_w)
|
||||
# image_shape = ",".join([str(x) for x in data_shape])
|
||||
# margin_symbols = []
|
||||
# if args.network[0]=='d':
|
||||
# embedding = fdensenet.get_symbol(args.emb_size, args.num_layers,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit)
|
||||
# elif args.network[0]=='m':
|
||||
# print('init mobilenet', args.num_layers)
|
||||
# if args.num_layers==1:
|
||||
# embedding = fmobilenet.get_symbol(args.emb_size,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit)
|
||||
# else:
|
||||
# embedding = fmobilenetv2.get_symbol(args.emb_size)
|
||||
# elif args.network[0]=='i':
|
||||
# print('init inception-resnet-v2', args.num_layers)
|
||||
# embedding = finception_resnet_v2.get_symbol(args.emb_size,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit)
|
||||
# elif args.network[0]=='x':
|
||||
# print('init xception', args.num_layers)
|
||||
# embedding = fxception.get_symbol(args.emb_size,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit)
|
||||
# elif args.network[0]=='p':
|
||||
# print('init dpn', args.num_layers)
|
||||
# embedding = fdpn.get_symbol(args.emb_size, args.num_layers,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit)
|
||||
# elif args.network[0]=='n':
|
||||
# print('init nasnet', args.num_layers)
|
||||
# embedding = fnasnet.get_symbol(args.emb_size)
|
||||
# elif args.network[0]=='s':
|
||||
# print('init spherenet', args.num_layers)
|
||||
# embedding = spherenet.get_symbol(args.emb_size, args.num_layers)
|
||||
# elif args.network[0]=='y':
|
||||
# print('init mobilefacenet', args.num_layers)
|
||||
# embedding = fmobilefacenet.get_symbol(args.emb_size, bn_mom = args.bn_mom, wd_mult = args.fc7_wd_mult)
|
||||
# else:
|
||||
# print('init resnet', args.num_layers)
|
||||
# embedding = fresnet.get_symbol(args.emb_size, args.num_layers,
|
||||
# version_se=args.version_se, version_input=args.version_input,
|
||||
# version_output=args.version_output, version_unit=args.version_unit,
|
||||
# version_act=args.version_act)
|
||||
# all_label = mx.symbol.Variable('softmax_label')
|
||||
# gt_label = all_label
|
||||
# extra_loss = None
|
||||
# _weight = mx.symbol.Variable("fc7_weight", shape=(args.num_classes, args.emb_size), lr_mult=1.0, wd_mult=args.fc7_wd_mult)
|
||||
# if args.loss_type==0: #softmax
|
||||
# _bias = mx.symbol.Variable('fc7_bias', lr_mult=2.0, wd_mult=0.0)
|
||||
# fc7 = mx.sym.FullyConnected(data=embedding, weight = _weight, bias = _bias, num_hidden=args.num_classes, name='fc7')
|
||||
# elif args.loss_type==1: #sphere
|
||||
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
|
||||
# fc7 = mx.sym.LSoftmax(data=embedding, label=gt_label, num_hidden=args.num_classes,
|
||||
# weight = _weight,
|
||||
# beta=args.beta, margin=args.margin, scale=args.scale,
|
||||
# beta_min=args.beta_min, verbose=1000, name='fc7')
|
||||
# elif args.loss_type==2:
|
||||
# s = args.margin_s
|
||||
# m = args.margin_m
|
||||
# assert(s>0.0)
|
||||
# assert(m>0.0)
|
||||
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
|
||||
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
|
||||
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
|
||||
# s_m = s*m
|
||||
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = s_m, off_value = 0.0)
|
||||
# fc7 = fc7-gt_one_hot
|
||||
# elif args.loss_type==4:
|
||||
# s = args.margin_s
|
||||
# m = args.margin_m
|
||||
# assert s>0.0
|
||||
# assert m>=0.0
|
||||
# assert m<(math.pi/2)
|
||||
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
|
||||
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
|
||||
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
|
||||
# zy = mx.sym.pick(fc7, gt_label, axis=1)
|
||||
# cos_t = zy/s
|
||||
# cos_m = math.cos(m)
|
||||
# sin_m = math.sin(m)
|
||||
# mm = math.sin(math.pi-m)*m
|
||||
# #threshold = 0.0
|
||||
# threshold = math.cos(math.pi-m)
|
||||
# if args.easy_margin:
|
||||
# cond = mx.symbol.Activation(data=cos_t, act_type='relu')
|
||||
# else:
|
||||
# cond_v = cos_t - threshold
|
||||
# cond = mx.symbol.Activation(data=cond_v, act_type='relu')
|
||||
# body = cos_t*cos_t
|
||||
# body = 1.0-body
|
||||
# sin_t = mx.sym.sqrt(body)
|
||||
# new_zy = cos_t*cos_m
|
||||
# b = sin_t*sin_m
|
||||
# new_zy = new_zy - b
|
||||
# new_zy = new_zy*s
|
||||
# if args.easy_margin:
|
||||
# zy_keep = zy
|
||||
# else:
|
||||
# zy_keep = zy - s*mm
|
||||
# new_zy = mx.sym.where(cond, new_zy, zy_keep)
|
||||
#
|
||||
# diff = new_zy - zy
|
||||
# diff = mx.sym.expand_dims(diff, 1)
|
||||
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0)
|
||||
# body = mx.sym.broadcast_mul(gt_one_hot, diff)
|
||||
# fc7 = fc7+body
|
||||
# elif args.loss_type==5:
|
||||
# s = args.margin_s
|
||||
# m = args.margin_m
|
||||
# assert s>0.0
|
||||
# _weight = mx.symbol.L2Normalization(_weight, mode='instance')
|
||||
# nembedding = mx.symbol.L2Normalization(embedding, mode='instance', name='fc1n')*s
|
||||
# fc7 = mx.sym.FullyConnected(data=nembedding, weight = _weight, no_bias = True, num_hidden=args.num_classes, name='fc7')
|
||||
# if args.margin_a!=1.0 or args.margin_m!=0.0 or args.margin_b!=0.0:
|
||||
# if args.margin_a==1.0 and args.margin_m==0.0:
|
||||
# s_m = s*args.margin_b
|
||||
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = s_m, off_value = 0.0)
|
||||
# fc7 = fc7-gt_one_hot
|
||||
# else:
|
||||
# zy = mx.sym.pick(fc7, gt_label, axis=1)
|
||||
# cos_t = zy/s
|
||||
# t = mx.sym.arccos(cos_t)
|
||||
# if args.margin_a!=1.0:
|
||||
# t = t*args.margin_a
|
||||
# if args.margin_m>0.0:
|
||||
# t = t+args.margin_m
|
||||
# body = mx.sym.cos(t)
|
||||
# if args.margin_b>0.0:
|
||||
# body = body - args.margin_b
|
||||
# new_zy = body*s
|
||||
# diff = new_zy - zy
|
||||
# diff = mx.sym.expand_dims(diff, 1)
|
||||
# gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0)
|
||||
# body = mx.sym.broadcast_mul(gt_one_hot, diff)
|
||||
# fc7 = fc7+body
|
||||
# out_list = [mx.symbol.BlockGrad(embedding)]
|
||||
# softmax = mx.symbol.SoftmaxOutput(data=fc7, label = gt_label, name='softmax', normalization='valid')
|
||||
# out_list.append(softmax)
|
||||
# out = mx.symbol.Group(out_list)
|
||||
# return (out, arg_params, aux_params)
|
||||
#
|
||||
def train_net(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
print('use cpu')
|
||||
else:
|
||||
print('gpu num:', len(ctx))
|
||||
prefix = args.prefix
|
||||
prefix_dir = os.path.dirname(prefix)
|
||||
if not os.path.exists(prefix_dir):
|
||||
os.makedirs(prefix_dir)
|
||||
end_epoch = args.end_epoch
|
||||
args.ctx_num = len(ctx)
|
||||
args.num_layers = int(args.network[1:])
|
||||
print('num_layers', args.num_layers)
|
||||
if args.per_batch_size==0:
|
||||
args.per_batch_size = 128
|
||||
args.batch_size = args.per_batch_size*args.ctx_num
|
||||
args.image_channel = 3
|
||||
|
||||
data_dir = args.data_dir
|
||||
if args.task=='gender':
|
||||
data_dir = args.gender_data_dir
|
||||
elif args.task=='age':
|
||||
data_dir = args.age_data_dir
|
||||
print('data dir', data_dir)
|
||||
path_imgrec = None
|
||||
path_imglist = None
|
||||
prop = face_image.load_property(data_dir)
|
||||
args.num_classes = prop.num_classes
|
||||
image_size = prop.image_size
|
||||
args.image_h = image_size[0]
|
||||
args.image_w = image_size[1]
|
||||
print('image_size', image_size)
|
||||
assert(args.num_classes>0)
|
||||
print('num_classes', args.num_classes)
|
||||
path_imgrec = os.path.join(data_dir, "train.rec")
|
||||
|
||||
|
||||
print('Called with argument:', args)
|
||||
data_shape = (args.image_channel,image_size[0],image_size[1])
|
||||
mean = None
|
||||
|
||||
begin_epoch = 0
|
||||
net = get_model()
|
||||
#if args.task=='':
|
||||
# test_net = get_model_test(net)
|
||||
#print(net.__class__)
|
||||
#net = net0[0]
|
||||
if args.network[0]=='r' or args.network[0]=='y':
|
||||
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style
|
||||
elif args.network[0]=='i' or args.network[0]=='x':
|
||||
initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="in", magnitude=2) #inception
|
||||
else:
|
||||
initializer = mx.init.Xavier(rnd_type='uniform', factor_type="in", magnitude=2)
|
||||
net.hybridize()
|
||||
if args.mode=='gluon':
|
||||
if len(args.pretrained)==0:
|
||||
pass
|
||||
else:
|
||||
net.load_params(args.pretrained, allow_missing=True, ignore_extra = True)
|
||||
net.initialize(initializer)
|
||||
net.collect_params().reset_ctx(ctx)
|
||||
|
||||
val_iter = None
|
||||
if args.task=='':
|
||||
train_iter = FaceImageIter(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
path_imgrec = path_imgrec,
|
||||
shuffle = True,
|
||||
rand_mirror = args.rand_mirror,
|
||||
mean = mean,
|
||||
cutoff = args.cutoff,
|
||||
)
|
||||
else:
|
||||
train_iter = FaceImageIterAge(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
path_imgrec = path_imgrec,
|
||||
task = args.task,
|
||||
shuffle = True,
|
||||
rand_mirror = args.rand_mirror,
|
||||
mean = mean,
|
||||
cutoff = args.cutoff,
|
||||
)
|
||||
|
||||
if args.task=='age':
|
||||
metric = CompositeEvalMetric([MAEMetric(), CUMMetric()])
|
||||
elif args.task=='gender':
|
||||
metric = CompositeEvalMetric([AccMetric()])
|
||||
else:
|
||||
metric = CompositeEvalMetric([AccMetric()])
|
||||
|
||||
ver_list = []
|
||||
ver_name_list = []
|
||||
if args.task=='':
|
||||
for name in args.eval.split(','):
|
||||
path = os.path.join(data_dir,name+".bin")
|
||||
if os.path.exists(path):
|
||||
data_set = verification.load_bin(path, image_size)
|
||||
ver_list.append(data_set)
|
||||
ver_name_list.append(name)
|
||||
print('ver', name)
|
||||
|
||||
def ver_test(nbatch):
|
||||
results = []
|
||||
for i in xrange(len(ver_list)):
|
||||
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], net, ctx, batch_size = args.batch_size)
|
||||
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
|
||||
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
|
||||
print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
|
||||
results.append(acc2)
|
||||
return results
|
||||
|
||||
def val_test(nbatch=0):
|
||||
acc = 0.0
|
||||
#if args.task=='age':
|
||||
if len(args.age_data_dir)>0:
|
||||
val_iter = FaceImageIterAge(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
path_imgrec = os.path.join(args.age_data_dir, 'val.rec'),
|
||||
task = args.task,
|
||||
shuffle = False,
|
||||
rand_mirror = False,
|
||||
mean = mean,
|
||||
)
|
||||
_metric = MAEMetric()
|
||||
val_metric = mx.metric.create(_metric)
|
||||
val_metric.reset()
|
||||
_metric2 = CUMMetric()
|
||||
val_metric2 = mx.metric.create(_metric2)
|
||||
val_metric2.reset()
|
||||
val_iter.reset()
|
||||
for batch in val_iter:
|
||||
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
|
||||
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
|
||||
outputs = []
|
||||
for x in data:
|
||||
outputs.append(net(x)[2])
|
||||
val_metric.update(label, outputs)
|
||||
val_metric2.update(label, outputs)
|
||||
_value = val_metric.get_name_value()[0][1]
|
||||
print('[%d][VMAE]: %f'%(nbatch, _value))
|
||||
_value = val_metric2.get_name_value()[0][1]
|
||||
if args.task=='age':
|
||||
acc = _value
|
||||
print('[%d][VCUM]: %f'%(nbatch, _value))
|
||||
if len(args.gender_data_dir)>0:
|
||||
val_iter = FaceImageIterAge(
|
||||
batch_size = args.batch_size,
|
||||
data_shape = data_shape,
|
||||
path_imgrec = os.path.join(args.gender_data_dir, 'val.rec'),
|
||||
task = args.task,
|
||||
shuffle = False,
|
||||
rand_mirror = False,
|
||||
mean = mean,
|
||||
)
|
||||
_metric = AccMetric()
|
||||
val_metric = mx.metric.create(_metric)
|
||||
val_metric.reset()
|
||||
val_iter.reset()
|
||||
for batch in val_iter:
|
||||
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
|
||||
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
|
||||
outputs = []
|
||||
for x in data:
|
||||
outputs.append(net(x)[1])
|
||||
val_metric.update(label, outputs)
|
||||
_value = val_metric.get_name_value()[0][1]
|
||||
if args.task=='gender':
|
||||
acc = _value
|
||||
print('[%d][VACC]: %f'%(nbatch, _value))
|
||||
return acc
|
||||
|
||||
|
||||
total_time = 0
|
||||
num_epochs = 0
|
||||
best_acc = [0]
|
||||
highest_acc = [0.0, 0.0] #lfw and target
|
||||
global_step = [0]
|
||||
save_step = [0]
|
||||
if len(args.lr_steps)==0:
|
||||
lr_steps = [100000, 140000, 160000]
|
||||
p = 512.0/args.batch_size
|
||||
for l in xrange(len(lr_steps)):
|
||||
lr_steps[l] = int(lr_steps[l]*p)
|
||||
else:
|
||||
lr_steps = [int(x) for x in args.lr_steps.split(',')]
|
||||
print('lr_steps', lr_steps)
|
||||
|
||||
kv = mx.kv.create('device')
|
||||
#kv = mx.kv.create('local')
|
||||
#_rescale = 1.0/args.ctx_num
|
||||
#opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
|
||||
#opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd)
|
||||
if args.mode=='gluon':
|
||||
trainer = gluon.Trainer(net.collect_params(), 'sgd',
|
||||
{'learning_rate': args.lr, 'wd': args.wd, 'momentum': args.mom, 'multi_precision': True},
|
||||
kvstore=kv)
|
||||
else:
|
||||
_rescale = 1.0/args.ctx_num
|
||||
opt = optimizer.SGD(learning_rate=args.lr, momentum=args.mom, wd=args.wd, rescale_grad=_rescale)
|
||||
_cb = mx.callback.Speedometer(args.batch_size, 20)
|
||||
arg_params = None
|
||||
aux_params = None
|
||||
data = mx.sym.var('data')
|
||||
label = mx.sym.var('softmax_label')
|
||||
if args.margin_a>0.0:
|
||||
fc7 = net(data, label)
|
||||
else:
|
||||
fc7 = net(data)
|
||||
#sym = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid')
|
||||
ceop = gluon.loss.SoftmaxCrossEntropyLoss()
|
||||
loss = ceop(fc7, label)
|
||||
#loss = loss/args.per_batch_size
|
||||
loss = mx.sym.mean(loss)
|
||||
sym = mx.sym.Group( [mx.symbol.BlockGrad(fc7), mx.symbol.MakeLoss(loss, name='softmax')] )
|
||||
|
||||
def _batch_callback():
|
||||
mbatch = global_step[0]
|
||||
global_step[0]+=1
|
||||
for _lr in lr_steps:
|
||||
if mbatch==_lr:
|
||||
args.lr *= 0.1
|
||||
if args.mode=='gluon':
|
||||
trainer.set_learning_rate(args.lr)
|
||||
else:
|
||||
opt.lr = args.lr
|
||||
print('lr change to', args.lr)
|
||||
break
|
||||
|
||||
#_cb(param)
|
||||
if mbatch%1000==0:
|
||||
print('lr-batch-epoch:',args.lr, mbatch)
|
||||
|
||||
if mbatch>0 and mbatch%args.verbose==0:
|
||||
save_step[0]+=1
|
||||
msave = save_step[0]
|
||||
do_save = False
|
||||
is_highest = False
|
||||
if args.task=='age' or args.task=='gender':
|
||||
acc = val_test(mbatch)
|
||||
if acc>=highest_acc[-1]:
|
||||
highest_acc[-1] = acc
|
||||
is_highest = True
|
||||
do_save = True
|
||||
else:
|
||||
acc_list = ver_test(mbatch)
|
||||
if len(acc_list)>0:
|
||||
lfw_score = acc_list[0]
|
||||
if lfw_score>highest_acc[0]:
|
||||
highest_acc[0] = lfw_score
|
||||
if lfw_score>=0.998:
|
||||
do_save = True
|
||||
if acc_list[-1]>=highest_acc[-1]:
|
||||
highest_acc[-1] = acc_list[-1]
|
||||
if lfw_score>=0.99:
|
||||
do_save = True
|
||||
is_highest = True
|
||||
if args.ckpt==0:
|
||||
do_save = False
|
||||
elif args.ckpt>1:
|
||||
do_save = True
|
||||
if do_save:
|
||||
print('saving', msave)
|
||||
#print('saving gluon params')
|
||||
fname = os.path.join(args.prefix, 'model-gluon.params')
|
||||
net.save_params(fname)
|
||||
fname = os.path.join(args.prefix, 'model')
|
||||
net.export(fname, msave)
|
||||
#arg, aux = model.get_params()
|
||||
#mx.model.save_checkpoint(prefix, msave, model.symbol, arg, aux)
|
||||
print('[%d]Accuracy-Highest: %1.5f'%(mbatch, highest_acc[-1]))
|
||||
if args.max_steps>0 and mbatch>args.max_steps:
|
||||
sys.exit(0)
|
||||
|
||||
def _batch_callback_sym(param):
|
||||
_cb(param)
|
||||
_batch_callback()
|
||||
|
||||
|
||||
if args.mode!='gluon':
|
||||
model = mx.mod.Module(
|
||||
context = ctx,
|
||||
symbol = sym,
|
||||
)
|
||||
model.fit(train_iter,
|
||||
begin_epoch = 0,
|
||||
num_epoch = args.end_epoch,
|
||||
eval_data = None,
|
||||
eval_metric = metric,
|
||||
kvstore = 'device',
|
||||
optimizer = opt,
|
||||
initializer = initializer,
|
||||
arg_params = arg_params,
|
||||
aux_params = aux_params,
|
||||
allow_missing = True,
|
||||
batch_end_callback = _batch_callback_sym,
|
||||
epoch_end_callback = None )
|
||||
else:
|
||||
loss_weight = 1.0
|
||||
if args.task=='age':
|
||||
loss_weight = 1.0/AGE
|
||||
#loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight)
|
||||
loss = nd.SoftmaxOutput
|
||||
#loss = gluon.loss.SoftmaxCrossEntropyLoss()
|
||||
while True:
|
||||
#trainer = update_learning_rate(opt.lr, trainer, epoch, opt.lr_factor, lr_steps)
|
||||
tic = time.time()
|
||||
train_iter.reset()
|
||||
metric.reset()
|
||||
btic = time.time()
|
||||
for i, batch in enumerate(train_iter):
|
||||
_batch_callback()
|
||||
#data = gluon.utils.split_and_load(batch.data[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
|
||||
#label = gluon.utils.split_and_load(batch.label[0].astype(opt.dtype), ctx_list=ctx, batch_axis=0)
|
||||
data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
|
||||
label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
|
||||
outputs = []
|
||||
Ls = []
|
||||
with ag.record():
|
||||
for x, y in zip(data, label):
|
||||
#print(y.asnumpy())
|
||||
if args.task=='':
|
||||
if args.margin_a>0.0:
|
||||
z = net(x,y)
|
||||
else:
|
||||
z = net(x)
|
||||
#print(z[0].shape, z[1].shape)
|
||||
else:
|
||||
z = net(x)
|
||||
if args.task=='gender':
|
||||
L = loss(z[1], y)
|
||||
#L = L/args.per_batch_size
|
||||
Ls.append(L)
|
||||
outputs.append(z[1])
|
||||
elif args.task=='age':
|
||||
for k in xrange(AGE):
|
||||
_z = nd.slice_axis(z[2], axis=1, begin=k*2, end=k*2+2)
|
||||
_y = nd.slice_axis(y, axis=1, begin=k, end=k+1)
|
||||
_y = nd.flatten(_y)
|
||||
L = loss(_z, _y)
|
||||
#L = L/args.per_batch_size
|
||||
#L /= AGE
|
||||
Ls.append(L)
|
||||
outputs.append(z[2])
|
||||
else:
|
||||
L = loss(z, y)
|
||||
#L = L/args.per_batch_size
|
||||
Ls.append(L)
|
||||
outputs.append(z)
|
||||
# store the loss and do backward after we have done forward
|
||||
# on all GPUs for better speed on multiple GPUs.
|
||||
ag.backward(Ls)
|
||||
#trainer.step(batch.data[0].shape[0], ignore_stale_grad=True)
|
||||
#trainer.step(args.ctx_num)
|
||||
n = batch.data[0].shape[0]
|
||||
#print(n,n)
|
||||
trainer.step(n)
|
||||
metric.update(label, outputs)
|
||||
if i>0 and i%20==0:
|
||||
name, acc = metric.get()
|
||||
if len(name)==2:
|
||||
logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f, %s=%f'%(
|
||||
num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0], name[1], acc[1]))
|
||||
else:
|
||||
logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f'%(
|
||||
num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0]))
|
||||
#metric.reset()
|
||||
btic = time.time()
|
||||
|
||||
epoch_time = time.time()-tic
|
||||
|
||||
# First epoch will usually be much slower than the subsequent epics,
|
||||
# so don't factor into the average
|
||||
if num_epochs > 0:
|
||||
total_time = total_time + epoch_time
|
||||
|
||||
#name, acc = metric.get()
|
||||
#logger.info('[Epoch %d] training: %s=%f, %s=%f'%(num_epochs, name[0], acc[0], name[1], acc[1]))
|
||||
logger.info('[Epoch %d] time cost: %f'%(num_epochs, epoch_time))
|
||||
num_epochs = num_epochs + 1
|
||||
#name, val_acc = test(ctx, val_data)
|
||||
#logger.info('[Epoch %d] validation: %s=%f, %s=%f'%(epoch, name[0], val_acc[0], name[1], val_acc[1]))
|
||||
|
||||
# save model if meet requirements
|
||||
#save_checkpoint(epoch, val_acc[0], best_acc)
|
||||
if num_epochs > 1:
|
||||
print('Average epoch time: {}'.format(float(total_time)/(num_epochs - 1)))
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
#time.sleep(3600*6.5)
|
||||
global args
|
||||
args = parse_args()
|
||||
train_net(args)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -1,369 +0,0 @@
|
||||
"""Helper for evaluation on the Labeled Faces in the Wild dataset
|
||||
"""
|
||||
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2016 David Sandberg
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import sys
|
||||
import numpy as np
|
||||
from scipy import misc
|
||||
from sklearn.model_selection import KFold
|
||||
from scipy import interpolate
|
||||
import sklearn
|
||||
import cv2
|
||||
import math
|
||||
import datetime
|
||||
import pickle
|
||||
from sklearn.decomposition import PCA
|
||||
import mxnet as mx
|
||||
from mxnet import gluon
|
||||
from mxnet import ndarray as nd
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
|
||||
import face_image
|
||||
|
||||
|
||||
class LFold:
|
||||
def __init__(self, n_splits = 2, shuffle = False):
|
||||
self.n_splits = n_splits
|
||||
if self.n_splits>1:
|
||||
self.k_fold = KFold(n_splits = n_splits, shuffle = shuffle)
|
||||
|
||||
def split(self, indices):
|
||||
if self.n_splits>1:
|
||||
return self.k_fold.split(indices)
|
||||
else:
|
||||
return [(indices, indices)]
|
||||
|
||||
|
||||
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, pca = 0):
|
||||
assert(embeddings1.shape[0] == embeddings2.shape[0])
|
||||
assert(embeddings1.shape[1] == embeddings2.shape[1])
|
||||
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||
nrof_thresholds = len(thresholds)
|
||||
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||
|
||||
tprs = np.zeros((nrof_folds,nrof_thresholds))
|
||||
fprs = np.zeros((nrof_folds,nrof_thresholds))
|
||||
accuracy = np.zeros((nrof_folds))
|
||||
indices = np.arange(nrof_pairs)
|
||||
#print('pca', pca)
|
||||
|
||||
if pca==0:
|
||||
diff = np.subtract(embeddings1, embeddings2)
|
||||
dist = np.sum(np.square(diff),1)
|
||||
|
||||
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||
#print('train_set', train_set)
|
||||
#print('test_set', test_set)
|
||||
if pca>0:
|
||||
print('doing pca on', fold_idx)
|
||||
embed1_train = embeddings1[train_set]
|
||||
embed2_train = embeddings2[train_set]
|
||||
_embed_train = np.concatenate( (embed1_train, embed2_train), axis=0 )
|
||||
#print(_embed_train.shape)
|
||||
pca_model = PCA(n_components=pca)
|
||||
pca_model.fit(_embed_train)
|
||||
embed1 = pca_model.transform(embeddings1)
|
||||
embed2 = pca_model.transform(embeddings2)
|
||||
embed1 = sklearn.preprocessing.normalize(embed1)
|
||||
embed2 = sklearn.preprocessing.normalize(embed2)
|
||||
#print(embed1.shape, embed2.shape)
|
||||
diff = np.subtract(embed1, embed2)
|
||||
dist = np.sum(np.square(diff),1)
|
||||
|
||||
# Find the best threshold for the fold
|
||||
acc_train = np.zeros((nrof_thresholds))
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
|
||||
best_threshold_index = np.argmax(acc_train)
|
||||
#print('threshold', thresholds[best_threshold_index])
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
|
||||
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
|
||||
|
||||
tpr = np.mean(tprs,0)
|
||||
fpr = np.mean(fprs,0)
|
||||
return tpr, fpr, accuracy
|
||||
|
||||
def calculate_accuracy(threshold, dist, actual_issame):
|
||||
predict_issame = np.less(dist, threshold)
|
||||
tp = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||
tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
|
||||
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
|
||||
|
||||
tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
|
||||
fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
|
||||
acc = float(tp+tn)/dist.size
|
||||
return tpr, fpr, acc
|
||||
|
||||
|
||||
|
||||
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
|
||||
assert(embeddings1.shape[0] == embeddings2.shape[0])
|
||||
assert(embeddings1.shape[1] == embeddings2.shape[1])
|
||||
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
|
||||
nrof_thresholds = len(thresholds)
|
||||
k_fold = LFold(n_splits=nrof_folds, shuffle=False)
|
||||
|
||||
val = np.zeros(nrof_folds)
|
||||
far = np.zeros(nrof_folds)
|
||||
|
||||
diff = np.subtract(embeddings1, embeddings2)
|
||||
dist = np.sum(np.square(diff),1)
|
||||
indices = np.arange(nrof_pairs)
|
||||
|
||||
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
|
||||
|
||||
# Find the threshold that gives FAR = far_target
|
||||
far_train = np.zeros(nrof_thresholds)
|
||||
for threshold_idx, threshold in enumerate(thresholds):
|
||||
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
|
||||
if np.max(far_train)>=far_target:
|
||||
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
|
||||
threshold = f(far_target)
|
||||
else:
|
||||
threshold = 0.0
|
||||
|
||||
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
|
||||
|
||||
val_mean = np.mean(val)
|
||||
far_mean = np.mean(far)
|
||||
val_std = np.std(val)
|
||||
return val_mean, val_std, far_mean
|
||||
|
||||
|
||||
def calculate_val_far(threshold, dist, actual_issame):
|
||||
predict_issame = np.less(dist, threshold)
|
||||
true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
|
||||
false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
|
||||
n_same = np.sum(actual_issame)
|
||||
n_diff = np.sum(np.logical_not(actual_issame))
|
||||
#print(true_accept, false_accept)
|
||||
#print(n_same, n_diff)
|
||||
val = float(true_accept) / float(n_same)
|
||||
far = float(false_accept) / float(n_diff)
|
||||
return val, far
|
||||
|
||||
def evaluate(embeddings, actual_issame, nrof_folds=10, pca = 0):
|
||||
# Calculate evaluation metrics
|
||||
thresholds = np.arange(0, 4, 0.01)
|
||||
embeddings1 = embeddings[0::2]
|
||||
embeddings2 = embeddings[1::2]
|
||||
tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
|
||||
np.asarray(actual_issame), nrof_folds=nrof_folds, pca = pca)
|
||||
thresholds = np.arange(0, 4, 0.001)
|
||||
val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
|
||||
np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
|
||||
return tpr, fpr, accuracy, val, val_std, far
|
||||
|
||||
def load_bin(path, image_size):
|
||||
bins, issame_list = pickle.load(open(path, 'rb'))
|
||||
data_list = []
|
||||
for flip in [0,1]:
|
||||
data = nd.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
|
||||
data_list.append(data)
|
||||
for i in xrange(len(issame_list)*2):
|
||||
_bin = bins[i]
|
||||
img = mx.image.imdecode(_bin)
|
||||
img = nd.transpose(img, axes=(2, 0, 1))
|
||||
for flip in [0,1]:
|
||||
if flip==1:
|
||||
img = mx.ndarray.flip(data=img, axis=2)
|
||||
data_list[flip][i][:] = img
|
||||
if i%1000==0:
|
||||
print('loading bin', i)
|
||||
print(data_list[0].shape)
|
||||
return (data_list, issame_list)
|
||||
|
||||
def test(data_set, net, ctx, batch_size, nfolds=10):
|
||||
print('testing verification..')
|
||||
data_list = data_set[0]
|
||||
issame_list = data_set[1]
|
||||
embeddings_list = []
|
||||
time_consumed = 0.0
|
||||
for i in xrange( len(data_list) ):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
while ba<data.shape[0]:
|
||||
bb = min(ba+batch_size, data.shape[0])
|
||||
count = bb-ba
|
||||
#print(ba, bb)
|
||||
x = nd.slice_axis(data, axis=0, begin=bb-batch_size, end=bb)
|
||||
#print(_data.shape, _label.shape)
|
||||
time0 = datetime.datetime.now()
|
||||
#x = x.as_in_context(ctx[0])
|
||||
xs = gluon.utils.split_and_load(x, ctx_list=ctx, batch_axis=0)
|
||||
zs = []
|
||||
for x in xs:
|
||||
with mx.autograd.predict_mode():
|
||||
z = net.feature(x)
|
||||
zs.append(z)
|
||||
zss = []
|
||||
for z in zs:
|
||||
zss.append(z.asnumpy())
|
||||
zss = np.concatenate(zss, axis=0)
|
||||
#print(zss.shape)
|
||||
_embeddings = zss
|
||||
#_arg, _aux = model.get_params()
|
||||
#__arg = {}
|
||||
#for k,v in _arg.iteritems():
|
||||
# __arg[k] = v.as_in_context(_ctx)
|
||||
#_arg = __arg
|
||||
#_arg["data"] = _data.as_in_context(_ctx)
|
||||
#_arg["softmax_label"] = _label.as_in_context(_ctx)
|
||||
#for k,v in _arg.iteritems():
|
||||
# print(k,v.context)
|
||||
#exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
|
||||
#exe.forward(is_train=False)
|
||||
#net_out = exe.outputs
|
||||
#_embeddings = z.asnumpy()
|
||||
time_now = datetime.datetime.now()
|
||||
diff = time_now - time0
|
||||
time_consumed+=diff.total_seconds()
|
||||
#print(_embeddings.shape)
|
||||
if embeddings is None:
|
||||
embeddings = np.zeros( (data.shape[0], _embeddings.shape[1]) )
|
||||
embeddings[ba:bb,:] = _embeddings[(batch_size-count):,:]
|
||||
ba = bb
|
||||
embeddings_list.append(embeddings)
|
||||
|
||||
_xnorm = 0.0
|
||||
_xnorm_cnt = 0
|
||||
for embed in embeddings_list:
|
||||
for i in xrange(embed.shape[0]):
|
||||
_em = embed[i]
|
||||
_norm=np.linalg.norm(_em)
|
||||
#print(_em.shape, _norm)
|
||||
_xnorm+=_norm
|
||||
_xnorm_cnt+=1
|
||||
_xnorm /= _xnorm_cnt
|
||||
|
||||
embeddings = embeddings_list[0].copy()
|
||||
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||
acc1 = 0.0
|
||||
std1 = 0.0
|
||||
#_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
|
||||
#acc1, std1 = np.mean(accuracy), np.std(accuracy)
|
||||
|
||||
#print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
|
||||
#embeddings = np.concatenate(embeddings_list, axis=1)
|
||||
embeddings = embeddings_list[0] + embeddings_list[1]
|
||||
embeddings = sklearn.preprocessing.normalize(embeddings)
|
||||
print(embeddings.shape)
|
||||
print('infer time', time_consumed)
|
||||
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=nfolds)
|
||||
acc2, std2 = np.mean(accuracy), np.std(accuracy)
|
||||
return acc1, std1, acc2, std2, _xnorm, embeddings_list
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='do verification')
|
||||
# general
|
||||
parser.add_argument('--data-dir', default='', help='')
|
||||
parser.add_argument('--model', default='../model/softmax,50', help='path to load model.')
|
||||
parser.add_argument('--target', default='lfw,cfp_ff,cfp_fp,agedb_30', help='test targets.')
|
||||
parser.add_argument('--gpu', default=0, type=int, help='gpu id')
|
||||
parser.add_argument('--batch-size', default=32, type=int, help='')
|
||||
parser.add_argument('--max', default='', type=str, help='')
|
||||
parser.add_argument('--mode', default=0, type=int, help='')
|
||||
parser.add_argument('--nfolds', default=10, type=int, help='')
|
||||
args = parser.parse_args()
|
||||
|
||||
prop = face_image.load_property(args.data_dir)
|
||||
image_size = prop.image_size
|
||||
print('image_size', image_size)
|
||||
ctx = mx.gpu(args.gpu)
|
||||
nets = []
|
||||
vec = args.model.split(',')
|
||||
prefix = args.model.split(',')[0]
|
||||
epochs = []
|
||||
if len(vec)==1:
|
||||
pdir = os.path.dirname(prefix)
|
||||
for fname in os.listdir(pdir):
|
||||
if not fname.endswith('.params'):
|
||||
continue
|
||||
_file = os.path.join(pdir, fname)
|
||||
if _file.startswith(prefix):
|
||||
epoch = int(fname.split('.')[0].split('-')[1])
|
||||
epochs.append(epoch)
|
||||
epochs = sorted(epochs, reverse=True)
|
||||
if len(args.max)>0:
|
||||
_max = [int(x) for x in args.max.split(',')]
|
||||
assert len(_max)==2
|
||||
if len(epochs)>_max[1]:
|
||||
epochs = epochs[_max[0]:_max[1]]
|
||||
|
||||
else:
|
||||
epochs = [int(x) for x in vec[1].split('|')]
|
||||
print('model number', len(epochs))
|
||||
time0 = datetime.datetime.now()
|
||||
for epoch in epochs:
|
||||
print('loading',prefix, epoch)
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
#arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
|
||||
all_layers = sym.get_internals()
|
||||
sym = all_layers['fc1_output']
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
|
||||
#model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
|
||||
model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))])
|
||||
model.set_params(arg_params, aux_params)
|
||||
nets.append(model)
|
||||
time_now = datetime.datetime.now()
|
||||
diff = time_now - time0
|
||||
print('model loading time', diff.total_seconds())
|
||||
|
||||
ver_list = []
|
||||
ver_name_list = []
|
||||
for name in args.target.split(','):
|
||||
path = os.path.join(args.data_dir,name+".bin")
|
||||
if os.path.exists(path):
|
||||
print('loading.. ', name)
|
||||
data_set = load_bin(path, image_size)
|
||||
ver_list.append(data_set)
|
||||
ver_name_list.append(name)
|
||||
|
||||
if args.mode==0:
|
||||
for i in xrange(len(ver_list)):
|
||||
results = []
|
||||
for model in nets:
|
||||
acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)
|
||||
print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
|
||||
print('[%s]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], acc1, std1))
|
||||
print('[%s]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], acc2, std2))
|
||||
results.append(acc2)
|
||||
print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
|
||||
elif args.mode==1:
|
||||
model = nets[0]
|
||||
test_badcase(ver_list[0], model, args.batch_size, args.target)
|
||||
else:
|
||||
model = nets[0]
|
||||
dumpR(ver_list[0], model, args.batch_size, args.target)
|
||||
|
||||
|
||||
@@ -1,6 +1,53 @@
|
||||
[The Lightweight Face Recognition Challenge & Workshop](https://ibug.doc.ic.ac.uk/resources/lightweight-face-recognition-challenge-workshop/) will be held in conjunction with the International Conference on Computer Vision (ICCV) 2019, Seoul Korea.
|
||||
|
||||
[Test Server](http://39.104.128.76/overview)
|
||||
Please strictly follow the rules. For example, please use the same [method](https://github.com/deepinsight/insightface/blob/master/common/flops_counter.py) for the FLOPs calculation regardless of your training framework is insightface or not.
|
||||
|
||||
[Test Server](http://www.insightface-challenge.com/overview)
|
||||
|
||||
**Sponsors:**
|
||||
|
||||
The Lightweight Face Recognition Challenge has been supported by
|
||||
|
||||
EPSRC project FACER2VM (EP/N007743/1)
|
||||
|
||||
Huawei (5000$)
|
||||
|
||||
DeepGlint (3000$)
|
||||
|
||||
iQIYI (3000$)
|
||||
|
||||
Kingsoft Cloud (3000$)
|
||||
|
||||
Pensees (3000$)
|
||||
|
||||
Dynamic funding pool: (17000$)
|
||||
|
||||
Cash sponsors and gift donations are welcome.
|
||||
|
||||
Contact:
|
||||
insightface.challenge@gmail.com
|
||||
|
||||
**Discussion Group**
|
||||
|
||||
*For Chinese:*
|
||||
|
||||

|
||||
|
||||
*For English:*
|
||||
|
||||
(in #lfr2019 channel)
|
||||
https://join.slack.com/t/insightface/shared_invite/enQtNjU0NDk2MjYyMTMzLTIzNDEwNmIxMjU5OGYzYzFhMjlkNjlhMTBkNWFiNjU4MTVhNTgzYjQ5ZTZiMGM3MzUyNzQ3OTBhZTg3MzM5M2I
|
||||
|
||||
|
||||
**NEWS**
|
||||
|
||||
``2019.06.21`` We updated the groundtruth of Glint test dataset.
|
||||
|
||||
``2019.06.04`` We will clean the groundtruth on deepglint testset.
|
||||
|
||||
``2019.05.21`` Baseline models and training logs available.
|
||||
|
||||
``2019.05.16`` The four tracks (deepglint-light, deepglint-large, iQIYI-light, iQIYI-large) will equally share the dynamic funding pool (14000$). From each track, the top 3 players will share the funding pool for 50%, 30% and 20% respectively.
|
||||
|
||||
==================
|
||||
|
||||
@@ -10,20 +57,21 @@
|
||||
|
||||
1. Download ms1m-retinaface from [baiducloud](https://pan.baidu.com/s/1rQxJ3drqm_071vpxBtp98A) or [dropbox](https://www.dropbox.com/s/ev5ezzcz79p2hge/ms1m-retinaface-t1.zip?dl=0) and unzip it to `$INSIGHTFACE_ROOT/datasets/`
|
||||
2. Go into `$INSIGHTFACE_ROOT/recognition/`
|
||||
3. Refer to the `retina` dataset config section in `sample_config.py` and copy it to your own`config.py`.
|
||||
3. Refer to the `retina` dataset configuration section in `sample_config.py` and copy it as your own configuration file `config.py`.
|
||||
4. Start training with `CUDA_VISIBLE_DEVICES='0,1,2,3' python -u train.py --dataset retina --network [your-network] --loss arcface`. It will output the accuracy of lfw, cfp_fp and agedb_30 every 2000 batches by default.
|
||||
5. Putting the training dataset on SSD hard disk will achieve better training efficiency.
|
||||
|
||||
------------------
|
||||
|
||||
**Testing:**
|
||||
|
||||
1. testdata-image from [baiducloud](https://pan.baidu.com/s/1UKUYsRfVTSzj1tfU3BVFrw) or [dropbox](https://www.dropbox.com/s/r5y6xt754m36rh8/iccv19-challenge-data-v1.zip?dl=0). These face images are all pre-processed and aligned so no need to do further modification.
|
||||
2. To download testdata-video from iQIYI, please visit <http://challenge.ai.iqiyi.com/data-cluster>. You must download iQIYI-VID-FACE.z01, iQIYI-VID-FACE.z02 and iQIYI-VID-FACE.zip after signin. These face images are all pre-processed and aligned so no need to do further modification.
|
||||
1. To unzip: ``zip iQIYI_VID_FACE.zip -s=0 --out iQIYI_VID_FACE_ALL.zip; unzip iQIYI_VID_FACE_ALL.zip``
|
||||
2. We can get a directory named ``iQIYI_VID_FACE`` after decompression. Then we have to move ``video_filelist.txt`` in testdata-image package to ``iQIYI_VID_FACE/filelist.txt``, to indicate the order of videos in our submission feature file.
|
||||
1. Download testdata-image from [baiducloud](https://pan.baidu.com/s/1UKUYsRfVTSzj1tfU3BVFrw) or [dropbox](https://www.dropbox.com/s/r5y6xt754m36rh8/iccv19-challenge-data-v1.zip?dl=0). These face images are all pre-processed and aligned.
|
||||
2. To download testdata-video from iQIYI, please visit <http://challenge.ai.iqiyi.com/data-cluster>. You need to download iQIYI-VID-FACE.z01, iQIYI-VID-FACE.z02 and iQIYI-VID-FACE.zip after registration. These face frames are also pre-processed and aligned.
|
||||
1. Unzip: ``zip iQIYI_VID_FACE.zip -s=0 --out iQIYI_VID_FACE_ALL.zip; unzip iQIYI_VID_FACE_ALL.zip``
|
||||
2. We can get a directory named ``iQIYI_VID_FACE`` after decompression. Then, we have to move ``video_filelist.txt`` in testdata-image package to ``iQIYI_VID_FACE/filelist.txt``, to indicate the order of videos in our submission feature file.
|
||||
3. To generate image feature submission file: check ``gen_image_feature.py``
|
||||
4. To generate video feature submission file: check ``gen_video_feature.py``
|
||||
5. Submit binary feature to the right section on test server.
|
||||
5. Submit binary feature to the right track of the test server.
|
||||
|
||||
You can also check the verification performance during training time on LFW,CFP_FP,AgeDB_30 datasets.
|
||||
|
||||
@@ -35,10 +83,16 @@ Final ranking is determined by the TAR under 1:1 protocal only, for all valid su
|
||||
|
||||
For image testset, we evaluate the TAR under FAR@e-8 while we choose the TAR under FAR@e-4 for video testset.
|
||||
|
||||
For track-1, we will rank all players as following formula: ``TAR(glint-light)+TAR(iqiyi-light)``
|
||||
------------------
|
||||
|
||||
For track-2, we will rank all players as following formula: ``TAR(glint-large)+TAR(iqiyi-large)``
|
||||
**Baseline:**
|
||||
|
||||
1. Network y2(a deeper mobilefacenet): 933M FLOPs. TAR_image: 0.64691, TAR_video: 0.47191
|
||||
2. Network r100fc(ResNet100FC-IR): 24G FLOPs. TAR_image: 0.80312, TAR_video: 0.64894
|
||||
|
||||
Baseline models download link: [baidu cloud](https://pan.baidu.com/s/1Em0ZFnefSoTsZoTd-9m8Nw) [dropbox](https://www.dropbox.com/s/yqaziktiv38ehrv/iccv19-baseline-models.zip?dl=0)
|
||||
|
||||
Training logs: [baidu cloud](https://pan.baidu.com/s/12rsp-oMzsjTeU6nugEvA9g) [dropbox](https://www.dropbox.com/s/4ufb9g7n76rfav5/iccv-baseline-log.zip?dl=0)
|
||||
|
||||
------------------
|
||||
|
||||
@@ -48,17 +102,10 @@ For track-2, we will rank all players as following formula: ``TAR(glint-large)+T
|
||||
|
||||
------------------
|
||||
|
||||
**Baseline:**
|
||||
|
||||
1. Network y2(a deeper mobilefacenet): 933M FLOPs. TAR_image: 0.64691, TAR_video: [TODO]
|
||||
2. Network r100fc(ResNet100FC-IR): 24G FLOPs. TAR_image: 0.80312, TAR_video: [TODO]
|
||||
|
||||
------------------
|
||||
|
||||
**Candidate solutions:**
|
||||
|
||||
1. Use slightly deeper or wider mobile-level networks.
|
||||
2. Try different training methods/losses than straightforward arcface.
|
||||
1. Manually design or automatically search different networks/losses.
|
||||
2. Use slightly deeper or wider mobile-level networks.
|
||||
3. [OctConv](https://arxiv.org/abs/1904.05049), to reduce FLOPs.
|
||||
4. [HRNet](https://arxiv.org/abs/1904.04514), for large FLOPs track.
|
||||
and so on
|
||||
|
||||
@@ -30,7 +30,7 @@ use_flip = True
|
||||
|
||||
|
||||
def do_flip(data):
|
||||
for idx in xrange(data.shape[0]):
|
||||
for idx in range(data.shape[0]):
|
||||
data[idx,:,:] = np.fliplr(data[idx,:,:])
|
||||
|
||||
def get_feature(buffer):
|
||||
@@ -83,7 +83,7 @@ def main(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
|
||||
@@ -32,7 +32,7 @@ ctx_num = 0
|
||||
|
||||
|
||||
def do_flip(data):
|
||||
for idx in xrange(data.shape[0]):
|
||||
for idx in range(data.shape[0]):
|
||||
data[idx,:,:] = np.fliplr(data[idx,:,:])
|
||||
|
||||
def get_feature(buffer):
|
||||
@@ -89,7 +89,7 @@ def main(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
|
||||
3
python-package/README.md
Normal file
3
python-package/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
InsightFace.ai README
|
||||
|
||||
|
||||
28
python-package/insightface/__init__.py
Normal file
28
python-package/insightface/__init__.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# coding: utf-8
|
||||
# pylint: disable=wrong-import-position
|
||||
"""InsightFace: A Face Analysis Toolkit."""
|
||||
from __future__ import absolute_import
|
||||
|
||||
# mxnet version check
|
||||
#mx_version = '1.4.0'
|
||||
try:
|
||||
import mxnet as mx
|
||||
#from distutils.version import LooseVersion
|
||||
#if LooseVersion(mx.__version__) < LooseVersion(mx_version):
|
||||
# msg = (
|
||||
# "Legacy mxnet-mkl=={} detected, some new modules may not work properly. "
|
||||
# "mxnet-mkl>={} is required. You can use pip to upgrade mxnet "
|
||||
# "`pip install mxnet-mkl --pre --upgrade` "
|
||||
# "or `pip install mxnet-cu90mkl --pre --upgrade`").format(mx.__version__, mx_version)
|
||||
# raise ImportError(msg)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Unable to import dependency mxnet. "
|
||||
"A quick tip is to install via `pip install mxnet-mkl/mxnet-cu90mkl --pre`. ")
|
||||
|
||||
__version__ = '0.1.3'
|
||||
|
||||
from . import model_zoo
|
||||
from . import utils
|
||||
from . import app
|
||||
|
||||
1
python-package/insightface/app/__init__.py
Normal file
1
python-package/insightface/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .face_analysis import *
|
||||
72
python-package/insightface/app/face_analysis.py
Normal file
72
python-package/insightface/app/face_analysis.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from __future__ import division
|
||||
import collections
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
import mxnet.ndarray as nd
|
||||
from ..model_zoo import model_zoo
|
||||
from ..utils import face_align
|
||||
|
||||
__all__ = ['FaceAnalysis',
|
||||
'Face']
|
||||
|
||||
Face = collections.namedtuple('Face', [
|
||||
'bbox', 'landmark', 'det_score', 'embedding', 'gender', 'age', 'embedding_norm', 'normed_embedding'])
|
||||
|
||||
Face.__new__.__defaults__ = (None,) * len(Face._fields)
|
||||
|
||||
class FaceAnalysis:
|
||||
def __init__(self, det_name='retinaface_r50_v1', rec_name='arcface_r100_v1', ga_name='genderage_v1'):
|
||||
assert det_name is not None
|
||||
self.det_model = model_zoo.get_model(det_name)
|
||||
if rec_name is not None:
|
||||
self.rec_model = model_zoo.get_model(rec_name)
|
||||
else:
|
||||
self.rec_model = None
|
||||
if ga_name is not None:
|
||||
self.ga_model = model_zoo.get_model(ga_name)
|
||||
else:
|
||||
self.ga_model = None
|
||||
|
||||
def prepare(self, ctx_id, nms=0.4):
|
||||
self.det_model.prepare(ctx_id, nms)
|
||||
if self.rec_model is not None:
|
||||
self.rec_model.prepare(ctx_id)
|
||||
if self.ga_model is not None:
|
||||
self.ga_model.prepare(ctx_id)
|
||||
|
||||
def get(self, img, det_thresh = 0.8, det_scale = 1.0, max_num = 0):
|
||||
bboxes, landmarks = self.det_model.detect(img, threshold=det_thresh, scale = det_scale)
|
||||
if bboxes.shape[0]==0:
|
||||
return []
|
||||
if max_num>0 and bboxes.shape[0]>max_num:
|
||||
area = (bboxes[:,2]-bboxes[:,0])*(bboxes[:,3]-bboxes[:,1])
|
||||
img_center = img.shape[0]//2, img.shape[1]//2
|
||||
offsets = np.vstack([ (bboxes[:,0]+bboxes[:,2])/2-img_center[1], (bboxes[:,1]+bboxes[:,3])/2-img_center[0] ])
|
||||
offset_dist_squared = np.sum(np.power(offsets,2.0),0)
|
||||
bindex = np.argmax(area-offset_dist_squared*2.0) # some extra weight on the centering
|
||||
bindex = bindex[0:max_num]
|
||||
bboxes = bboxes[bindex, :]
|
||||
landmarks = landmarks[bindex, :]
|
||||
ret = []
|
||||
for i in range(bboxes.shape[0]):
|
||||
bbox = bboxes[i, 0:4]
|
||||
det_score = bboxes[i,4]
|
||||
landmark = landmarks[i]
|
||||
_img = face_align.norm_crop(img, landmark = landmark)
|
||||
embedding = None
|
||||
embedding_norm = None
|
||||
normed_embedding = None
|
||||
gender = None
|
||||
age = None
|
||||
if self.rec_model is not None:
|
||||
embedding = self.rec_model.get_embedding(_img).flatten()
|
||||
embedding_norm = norm(embedding)
|
||||
normed_embedding = embedding / embedding_norm
|
||||
if self.ga_model is not None:
|
||||
gender, age = self.ga_model.get(_img)
|
||||
face = Face(bbox = bbox, landmark = landmark, det_score = det_score, embedding = embedding, gender = gender, age = age
|
||||
, normed_embedding=normed_embedding, embedding_norm = embedding_norm)
|
||||
ret.append(face)
|
||||
return ret
|
||||
|
||||
1
python-package/insightface/model_zoo/__init__.py
Normal file
1
python-package/insightface/model_zoo/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .model_zoo import get_model, get_model_list
|
||||
425
python-package/insightface/model_zoo/face_detection.py
Normal file
425
python-package/insightface/model_zoo/face_detection.py
Normal file
@@ -0,0 +1,425 @@
|
||||
from __future__ import division
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import mxnet.ndarray as nd
|
||||
|
||||
__all__ = ['FaceDetector',
|
||||
'retinaface_r50_v1',
|
||||
'retinaface_mnet025_v1',
|
||||
'retinaface_mnet025_v2',
|
||||
'get_retinaface']
|
||||
|
||||
def _whctrs(anchor):
|
||||
"""
|
||||
Return width, height, x center, and y center for an anchor (window).
|
||||
"""
|
||||
|
||||
w = anchor[2] - anchor[0] + 1
|
||||
h = anchor[3] - anchor[1] + 1
|
||||
x_ctr = anchor[0] + 0.5 * (w - 1)
|
||||
y_ctr = anchor[1] + 0.5 * (h - 1)
|
||||
return w, h, x_ctr, y_ctr
|
||||
|
||||
|
||||
def _mkanchors(ws, hs, x_ctr, y_ctr):
|
||||
"""
|
||||
Given a vector of widths (ws) and heights (hs) around a center
|
||||
(x_ctr, y_ctr), output a set of anchors (windows).
|
||||
"""
|
||||
|
||||
ws = ws[:, np.newaxis]
|
||||
hs = hs[:, np.newaxis]
|
||||
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
|
||||
y_ctr - 0.5 * (hs - 1),
|
||||
x_ctr + 0.5 * (ws - 1),
|
||||
y_ctr + 0.5 * (hs - 1)))
|
||||
return anchors
|
||||
|
||||
def _ratio_enum(anchor, ratios):
|
||||
"""
|
||||
Enumerate a set of anchors for each aspect ratio wrt an anchor.
|
||||
"""
|
||||
|
||||
w, h, x_ctr, y_ctr = _whctrs(anchor)
|
||||
size = w * h
|
||||
size_ratios = size / ratios
|
||||
ws = np.round(np.sqrt(size_ratios))
|
||||
hs = np.round(ws * ratios)
|
||||
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
|
||||
return anchors
|
||||
|
||||
|
||||
def _scale_enum(anchor, scales):
|
||||
"""
|
||||
Enumerate a set of anchors for each scale wrt an anchor.
|
||||
"""
|
||||
|
||||
w, h, x_ctr, y_ctr = _whctrs(anchor)
|
||||
ws = w * scales
|
||||
hs = h * scales
|
||||
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
|
||||
return anchors
|
||||
|
||||
def anchors_plane(height, width, stride, base_anchors):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
height: height of plane
|
||||
width: width of plane
|
||||
stride: stride ot the original image
|
||||
anchors_base: (A, 4) a base set of anchors
|
||||
Returns
|
||||
-------
|
||||
all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
|
||||
"""
|
||||
A = base_anchors.shape[0]
|
||||
all_anchors = np.zeros((height, width, A, 4), dtype=np.float32)
|
||||
for iw in range(width):
|
||||
sw = iw * stride
|
||||
for ih in range(height):
|
||||
sh = ih * stride
|
||||
for k in range(A):
|
||||
all_anchors[ih, iw, k, 0] = base_anchors[k, 0] + sw
|
||||
all_anchors[ih, iw, k, 1] = base_anchors[k, 1] + sh
|
||||
all_anchors[ih, iw, k, 2] = base_anchors[k, 2] + sw
|
||||
all_anchors[ih, iw, k, 3] = base_anchors[k, 3] + sh
|
||||
return all_anchors
|
||||
|
||||
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
|
||||
scales=2 ** np.arange(3, 6), stride=16):
|
||||
"""
|
||||
Generate anchor (reference) windows by enumerating aspect ratios X
|
||||
scales wrt a reference (0, 0, 15, 15) window.
|
||||
"""
|
||||
|
||||
base_anchor = np.array([1, 1, base_size, base_size]) - 1
|
||||
ratio_anchors = _ratio_enum(base_anchor, ratios)
|
||||
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
|
||||
for i in range(ratio_anchors.shape[0])])
|
||||
return anchors
|
||||
|
||||
def generate_anchors_fpn(cfg):
|
||||
"""
|
||||
Generate anchor (reference) windows by enumerating aspect ratios X
|
||||
scales wrt a reference (0, 0, 15, 15) window.
|
||||
"""
|
||||
RPN_FEAT_STRIDE = []
|
||||
for k in cfg:
|
||||
RPN_FEAT_STRIDE.append( int(k) )
|
||||
RPN_FEAT_STRIDE = sorted(RPN_FEAT_STRIDE, reverse=True)
|
||||
anchors = []
|
||||
for k in RPN_FEAT_STRIDE:
|
||||
v = cfg[str(k)]
|
||||
bs = v['BASE_SIZE']
|
||||
__ratios = np.array(v['RATIOS'])
|
||||
__scales = np.array(v['SCALES'])
|
||||
stride = int(k)
|
||||
#print('anchors_fpn', bs, __ratios, __scales, file=sys.stderr)
|
||||
r = generate_anchors(bs, __ratios, __scales, stride)
|
||||
#print('anchors_fpn', r.shape, file=sys.stderr)
|
||||
anchors.append(r)
|
||||
|
||||
return anchors
|
||||
|
||||
def clip_pad(tensor, pad_shape):
|
||||
"""
|
||||
Clip boxes of the pad area.
|
||||
:param tensor: [n, c, H, W]
|
||||
:param pad_shape: [h, w]
|
||||
:return: [n, c, h, w]
|
||||
"""
|
||||
H, W = tensor.shape[2:]
|
||||
h, w = pad_shape
|
||||
|
||||
if h < H or w < W:
|
||||
tensor = tensor[:, :, :h, :w].copy()
|
||||
|
||||
return tensor
|
||||
|
||||
def bbox_pred(boxes, box_deltas):
|
||||
"""
|
||||
Transform the set of class-agnostic boxes into class-specific boxes
|
||||
by applying the predicted offsets (box_deltas)
|
||||
:param boxes: !important [N 4]
|
||||
:param box_deltas: [N, 4 * num_classes]
|
||||
:return: [N 4 * num_classes]
|
||||
"""
|
||||
if boxes.shape[0] == 0:
|
||||
return np.zeros((0, box_deltas.shape[1]))
|
||||
|
||||
boxes = boxes.astype(np.float, copy=False)
|
||||
widths = boxes[:, 2] - boxes[:, 0] + 1.0
|
||||
heights = boxes[:, 3] - boxes[:, 1] + 1.0
|
||||
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
|
||||
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
|
||||
|
||||
dx = box_deltas[:, 0:1]
|
||||
dy = box_deltas[:, 1:2]
|
||||
dw = box_deltas[:, 2:3]
|
||||
dh = box_deltas[:, 3:4]
|
||||
|
||||
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
|
||||
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
|
||||
pred_w = np.exp(dw) * widths[:, np.newaxis]
|
||||
pred_h = np.exp(dh) * heights[:, np.newaxis]
|
||||
|
||||
pred_boxes = np.zeros(box_deltas.shape)
|
||||
# x1
|
||||
pred_boxes[:, 0:1] = pred_ctr_x - 0.5 * (pred_w - 1.0)
|
||||
# y1
|
||||
pred_boxes[:, 1:2] = pred_ctr_y - 0.5 * (pred_h - 1.0)
|
||||
# x2
|
||||
pred_boxes[:, 2:3] = pred_ctr_x + 0.5 * (pred_w - 1.0)
|
||||
# y2
|
||||
pred_boxes[:, 3:4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
|
||||
|
||||
if box_deltas.shape[1]>4:
|
||||
pred_boxes[:,4:] = box_deltas[:,4:]
|
||||
|
||||
return pred_boxes
|
||||
|
||||
def landmark_pred(boxes, landmark_deltas):
|
||||
if boxes.shape[0] == 0:
|
||||
return np.zeros((0, landmark_deltas.shape[1]))
|
||||
boxes = boxes.astype(np.float, copy=False)
|
||||
widths = boxes[:, 2] - boxes[:, 0] + 1.0
|
||||
heights = boxes[:, 3] - boxes[:, 1] + 1.0
|
||||
ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
|
||||
ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
|
||||
pred = landmark_deltas.copy()
|
||||
for i in range(5):
|
||||
pred[:,i,0] = landmark_deltas[:,i,0]*widths + ctr_x
|
||||
pred[:,i,1] = landmark_deltas[:,i,1]*heights + ctr_y
|
||||
return pred
|
||||
|
||||
class FaceDetector:
|
||||
def __init__(self, param_file, rac):
|
||||
self.param_file = param_file
|
||||
self.rac = rac
|
||||
self.default_image_size = (480, 640)
|
||||
|
||||
def prepare(self, ctx_id, nms=0.4, fix_image_size=None):
|
||||
pos = self.param_file.rfind('-')
|
||||
prefix = self.param_file[0:pos]
|
||||
pos2 = self.param_file.rfind('.')
|
||||
epoch = int(self.param_file[pos+1:pos2])
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
if ctx_id>=0:
|
||||
ctx = mx.gpu(ctx_id)
|
||||
else:
|
||||
ctx = mx.cpu()
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
|
||||
if fix_image_size is not None:
|
||||
data_shape = (1,3)+fix_image_size
|
||||
else:
|
||||
data_shape = (1,3)+self.default_image_size
|
||||
model.bind(data_shapes=[('data', data_shape)])
|
||||
model.set_params(arg_params, aux_params)
|
||||
#warmup
|
||||
data = mx.nd.zeros(shape=data_shape)
|
||||
db = mx.io.DataBatch(data=(data,))
|
||||
model.forward(db, is_train=False)
|
||||
out = model.get_outputs()[0].asnumpy()
|
||||
self.model = model
|
||||
self.nms_threshold = nms
|
||||
|
||||
self.landmark_std = 1.0
|
||||
_ratio = (1.,)
|
||||
fmc = 3
|
||||
if self.rac=='net3':
|
||||
_ratio = (1.,)
|
||||
elif self.rac=='net3l':
|
||||
_ratio = (1.,)
|
||||
self.landmark_std = 0.2
|
||||
elif network=='net5': #retinaface
|
||||
fmc = 5
|
||||
else:
|
||||
assert False, 'rac setting error %s'%self.rac
|
||||
|
||||
if fmc==3:
|
||||
self._feat_stride_fpn = [32, 16, 8]
|
||||
self.anchor_cfg = {
|
||||
'32': {'SCALES': (32,16), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
|
||||
'16': {'SCALES': (8,4), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
|
||||
'8': {'SCALES': (2,1), 'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999},
|
||||
}
|
||||
elif fmc==5:
|
||||
self._feat_stride_fpn = [64, 32, 16, 8, 4]
|
||||
self.anchor_cfg = {}
|
||||
_ass = 2.0**(1.0/3)
|
||||
_basescale = 1.0
|
||||
for _stride in [4, 8, 16, 32, 64]:
|
||||
key = str(_stride)
|
||||
value = {'BASE_SIZE': 16, 'RATIOS': _ratio, 'ALLOWED_BORDER': 9999}
|
||||
scales = []
|
||||
for _ in range(3):
|
||||
scales.append(_basescale)
|
||||
_basescale *= _ass
|
||||
value['SCALES'] = tuple(scales)
|
||||
self.anchor_cfg[key] = value
|
||||
|
||||
print(self._feat_stride_fpn, self.anchor_cfg)
|
||||
self.use_landmarks = False
|
||||
if len(sym)//len(self._feat_stride_fpn)==3:
|
||||
self.use_landmarks = True
|
||||
print('use_landmarks', self.use_landmarks)
|
||||
self.fpn_keys = []
|
||||
|
||||
for s in self._feat_stride_fpn:
|
||||
self.fpn_keys.append('stride%s'%s)
|
||||
|
||||
self._anchors_fpn = dict(zip(self.fpn_keys, generate_anchors_fpn(cfg=self.anchor_cfg)))
|
||||
for k in self._anchors_fpn:
|
||||
v = self._anchors_fpn[k].astype(np.float32)
|
||||
self._anchors_fpn[k] = v
|
||||
self.anchor_plane_cache = {}
|
||||
|
||||
self._num_anchors = dict(zip(self.fpn_keys, [anchors.shape[0] for anchors in self._anchors_fpn.values()]))
|
||||
|
||||
def detect(self, img, threshold=0.5, scale=1.0):
|
||||
proposals_list = []
|
||||
scores_list = []
|
||||
landmarks_list = []
|
||||
if scale==1.0:
|
||||
im = img
|
||||
else:
|
||||
im = cv2.resize(img, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
|
||||
im_info = [im.shape[0], im.shape[1]]
|
||||
im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
|
||||
for i in range(3):
|
||||
im_tensor[0, i, :, :] = im[:, :, 2 - i]
|
||||
data = nd.array(im_tensor)
|
||||
db = mx.io.DataBatch(data=(data,), provide_data=[('data', data.shape)])
|
||||
self.model.forward(db, is_train=False)
|
||||
net_out = self.model.get_outputs()
|
||||
for _idx,s in enumerate(self._feat_stride_fpn):
|
||||
_key = 'stride%s'%s
|
||||
stride = int(s)
|
||||
if self.use_landmarks:
|
||||
idx = _idx*3
|
||||
else:
|
||||
idx = _idx*2
|
||||
scores = net_out[idx].asnumpy()
|
||||
scores = scores[:, self._num_anchors['stride%s'%s]:, :, :]
|
||||
idx+=1
|
||||
bbox_deltas = net_out[idx].asnumpy()
|
||||
|
||||
height, width = bbox_deltas.shape[2], bbox_deltas.shape[3]
|
||||
A = self._num_anchors['stride%s'%s]
|
||||
K = height * width
|
||||
key = (height, width, stride)
|
||||
if key in self.anchor_plane_cache:
|
||||
anchors = self.anchor_plane_cache[key]
|
||||
else:
|
||||
anchors_fpn = self._anchors_fpn['stride%s'%s]
|
||||
anchors = anchors_plane(height, width, stride, anchors_fpn)
|
||||
anchors = anchors.reshape((K * A, 4))
|
||||
if len(self.anchor_plane_cache)<100:
|
||||
self.anchor_plane_cache[key] = anchors
|
||||
|
||||
scores = clip_pad(scores, (height, width))
|
||||
scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
|
||||
|
||||
bbox_deltas = clip_pad(bbox_deltas, (height, width))
|
||||
bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
|
||||
bbox_pred_len = bbox_deltas.shape[3]//A
|
||||
bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
|
||||
|
||||
proposals = bbox_pred(anchors, bbox_deltas)
|
||||
#proposals = clip_boxes(proposals, im_info[:2])
|
||||
|
||||
|
||||
scores_ravel = scores.ravel()
|
||||
order = np.where(scores_ravel>=threshold)[0]
|
||||
proposals = proposals[order, :]
|
||||
scores = scores[order]
|
||||
|
||||
proposals[:,0:4] /= scale
|
||||
|
||||
proposals_list.append(proposals)
|
||||
scores_list.append(scores)
|
||||
|
||||
if self.use_landmarks:
|
||||
idx+=1
|
||||
landmark_deltas = net_out[idx].asnumpy()
|
||||
landmark_deltas = clip_pad(landmark_deltas, (height, width))
|
||||
landmark_pred_len = landmark_deltas.shape[1]//A
|
||||
landmark_deltas = landmark_deltas.transpose((0, 2, 3, 1)).reshape((-1, 5, landmark_pred_len//5))
|
||||
landmark_deltas *= self.landmark_std
|
||||
#print(landmark_deltas.shape, landmark_deltas)
|
||||
landmarks = landmark_pred(anchors, landmark_deltas)
|
||||
landmarks = landmarks[order, :]
|
||||
|
||||
landmarks[:,:,0:2] /= scale
|
||||
landmarks_list.append(landmarks)
|
||||
|
||||
proposals = np.vstack(proposals_list)
|
||||
landmarks = None
|
||||
if proposals.shape[0]==0:
|
||||
if self.use_landmarks:
|
||||
landmarks = np.zeros( (0,5,2) )
|
||||
return np.zeros( (0,5) ), landmarks
|
||||
scores = np.vstack(scores_list)
|
||||
scores_ravel = scores.ravel()
|
||||
order = scores_ravel.argsort()[::-1]
|
||||
proposals = proposals[order, :]
|
||||
scores = scores[order]
|
||||
if self.use_landmarks:
|
||||
landmarks = np.vstack(landmarks_list)
|
||||
landmarks = landmarks[order].astype(np.float32, copy=False)
|
||||
|
||||
pre_det = np.hstack((proposals[:,0:4], scores)).astype(np.float32, copy=False)
|
||||
keep = self.nms(pre_det)
|
||||
det = np.hstack( (pre_det, proposals[:,4:]) )
|
||||
det = det[keep, :]
|
||||
if self.use_landmarks:
|
||||
landmarks = landmarks[keep]
|
||||
|
||||
return det, landmarks
|
||||
|
||||
def nms(self, dets):
|
||||
thresh = self.nms_threshold
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= thresh)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def get_retinaface(name, rac='net3',
|
||||
root='~/.insightface/models', **kwargs):
|
||||
from .model_store import get_model_file
|
||||
_file = get_model_file("retinaface_%s"%name, root=root)
|
||||
return FaceDetector(_file, rac)
|
||||
|
||||
def retinaface_r50_v1(**kwargs):
|
||||
return get_retinaface("r50_v1", rac='net3', **kwargs)
|
||||
|
||||
def retinaface_mnet025_v1(**kwargs):
|
||||
return get_retinaface("mnet025_v1", rac='net3', **kwargs)
|
||||
|
||||
def retinaface_mnet025_v2(**kwargs):
|
||||
return get_retinaface("mnet025_v2", rac='net3l', **kwargs)
|
||||
|
||||
77
python-package/insightface/model_zoo/face_genderage.py
Normal file
77
python-package/insightface/model_zoo/face_genderage.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from __future__ import division
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
__all__ = ['FaceGenderage',
|
||||
'genderage_v1',
|
||||
'get_genderage']
|
||||
|
||||
|
||||
class FaceGenderage:
|
||||
def __init__(self, name, download, param_file):
|
||||
self.name = name
|
||||
self.download = download
|
||||
self.param_file = param_file
|
||||
self.image_size = (112, 112)
|
||||
if download:
|
||||
assert param_file
|
||||
|
||||
def prepare(self, ctx_id):
|
||||
if self.param_file:
|
||||
pos = self.param_file.rfind('-')
|
||||
prefix = self.param_file[0:pos]
|
||||
pos2 = self.param_file.rfind('.')
|
||||
epoch = int(self.param_file[pos+1:pos2])
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
all_layers = sym.get_internals()
|
||||
sym = all_layers['fc1_output']
|
||||
if ctx_id>=0:
|
||||
ctx = mx.gpu(ctx_id)
|
||||
else:
|
||||
ctx = mx.cpu()
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
|
||||
data_shape = (1,3)+self.image_size
|
||||
model.bind(data_shapes=[('data', data_shape)])
|
||||
model.set_params(arg_params, aux_params)
|
||||
#warmup
|
||||
data = mx.nd.zeros(shape=data_shape)
|
||||
db = mx.io.DataBatch(data=(data,))
|
||||
model.forward(db, is_train=False)
|
||||
embedding = model.get_outputs()[0].asnumpy()
|
||||
self.model = model
|
||||
else:
|
||||
pass
|
||||
|
||||
def get(self, img):
|
||||
assert self.param_file and self.model
|
||||
assert img.shape[2]==3 and img.shape[0:2]==self.image_size
|
||||
data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
data = np.transpose(data, (2,0,1))
|
||||
data = np.expand_dims(data, axis=0)
|
||||
data = mx.nd.array(data)
|
||||
db = mx.io.DataBatch(data=(data,))
|
||||
self.model.forward(db, is_train=False)
|
||||
ret = self.model.get_outputs()[0].asnumpy()
|
||||
g = ret[:,0:2].flatten()
|
||||
gender = np.argmax(g)
|
||||
a = ret[:,2:202].reshape( (100,2) )
|
||||
a = np.argmax(a, axis=1)
|
||||
age = int(sum(a))
|
||||
return gender, age
|
||||
|
||||
def get_genderage(name, download=True,
|
||||
root='~/.insightface/models', **kwargs):
|
||||
if not download:
|
||||
return FaceGenderage(name, False, None)
|
||||
else:
|
||||
from .model_store import get_model_file
|
||||
_file = get_model_file("genderage_%s"%name, root=root)
|
||||
return FaceGenderage(name, True, _file)
|
||||
|
||||
def genderage_v1(**kwargs):
|
||||
return get_genderage("v1", download=True, **kwargs)
|
||||
|
||||
|
||||
|
||||
|
||||
83
python-package/insightface/model_zoo/face_recognition.py
Normal file
83
python-package/insightface/model_zoo/face_recognition.py
Normal file
@@ -0,0 +1,83 @@
|
||||
from __future__ import division
|
||||
import mxnet as mx
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
__all__ = ['FaceRecognition',
|
||||
'arcface_r100_v1', 'arcface_outofreach_v1', 'arcface_mfn_v1',
|
||||
'get_arcface']
|
||||
|
||||
|
||||
class FaceRecognition:
|
||||
def __init__(self, name, download, param_file):
|
||||
self.name = name
|
||||
self.download = download
|
||||
self.param_file = param_file
|
||||
self.image_size = (112, 112)
|
||||
if download:
|
||||
assert param_file
|
||||
|
||||
def prepare(self, ctx_id):
|
||||
if self.param_file:
|
||||
pos = self.param_file.rfind('-')
|
||||
prefix = self.param_file[0:pos]
|
||||
pos2 = self.param_file.rfind('.')
|
||||
epoch = int(self.param_file[pos+1:pos2])
|
||||
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
|
||||
all_layers = sym.get_internals()
|
||||
sym = all_layers['fc1_output']
|
||||
if ctx_id>=0:
|
||||
ctx = mx.gpu(ctx_id)
|
||||
else:
|
||||
ctx = mx.cpu()
|
||||
model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
|
||||
data_shape = (1,3)+self.image_size
|
||||
model.bind(data_shapes=[('data', data_shape)])
|
||||
model.set_params(arg_params, aux_params)
|
||||
#warmup
|
||||
data = mx.nd.zeros(shape=data_shape)
|
||||
db = mx.io.DataBatch(data=(data,))
|
||||
model.forward(db, is_train=False)
|
||||
embedding = model.get_outputs()[0].asnumpy()
|
||||
self.model = model
|
||||
else:
|
||||
pass
|
||||
|
||||
def get_embedding(self, img):
|
||||
assert self.param_file and self.model
|
||||
assert img.shape[2]==3 and img.shape[0:2]==self.image_size
|
||||
data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
data = np.transpose(data, (2,0,1))
|
||||
data = np.expand_dims(data, axis=0)
|
||||
data = mx.nd.array(data)
|
||||
db = mx.io.DataBatch(data=(data,))
|
||||
self.model.forward(db, is_train=False)
|
||||
embedding = self.model.get_outputs()[0].asnumpy()
|
||||
return embedding
|
||||
|
||||
def compute_sim(self, img1, img2):
|
||||
emb1 = self.get_embedding(img1).flatten()
|
||||
emb2 = self.get_embedding(img2).flatten()
|
||||
from numpy.linalg import norm
|
||||
sim = np.dot(emb1, emb2)/(norm(emb1)*norm(emb2))
|
||||
return sim
|
||||
|
||||
def get_arcface(name, download=True,
|
||||
root='~/.insightface/models', **kwargs):
|
||||
if not download:
|
||||
return FaceRecognition(name, False, None)
|
||||
else:
|
||||
from .model_store import get_model_file
|
||||
_file = get_model_file("arcface_%s"%name, root=root)
|
||||
return FaceRecognition(name, True, _file)
|
||||
|
||||
def arcface_r100_v1(**kwargs):
|
||||
return get_arcface("r100_v1", download=True, **kwargs)
|
||||
|
||||
|
||||
def arcface_mfn_v1(**kwargs):
|
||||
return get_arcface("mfn_v1", download=True, **kwargs)
|
||||
|
||||
def arcface_outofreach_v1(**kwargs):
|
||||
return get_arcface("outofreach_v1", download=False, **kwargs)
|
||||
|
||||
97
python-package/insightface/model_zoo/model_store.py
Normal file
97
python-package/insightface/model_zoo/model_store.py
Normal file
@@ -0,0 +1,97 @@
|
||||
|
||||
"""
|
||||
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_store.py
|
||||
"""
|
||||
from __future__ import print_function
|
||||
|
||||
__all__ = ['get_model_file']
|
||||
import os
|
||||
import zipfile
|
||||
import glob
|
||||
|
||||
from ..utils import download, check_sha1
|
||||
|
||||
_model_sha1 = {name: checksum for checksum, name in [
|
||||
('95be21b58e29e9c1237f229dae534bd854009ce0', 'arcface_r100_v1'),
|
||||
('', 'arcface_mfn_v1'),
|
||||
('39fd1e087a2a2ed70a154ac01fecaa86c315d01b', 'retinaface_r50_v1'),
|
||||
('2c9de8116d1f448fd1d4661f90308faae34c990a', 'retinaface_mnet025_v1'),
|
||||
('0db1d07921d005e6c9a5b38e059452fc5645e5a4', 'retinaface_mnet025_v2'),
|
||||
('7dd8111652b7aac2490c5dcddeb268e53ac643e6', 'genderage_v1'),
|
||||
]}
|
||||
|
||||
base_repo_url = 'http://insightface.ai/files/'
|
||||
_url_format = '{repo_url}models/{file_name}.zip'
|
||||
|
||||
|
||||
def short_hash(name):
|
||||
if name not in _model_sha1:
|
||||
raise ValueError('Pretrained model for {name} is not available.'.format(name=name))
|
||||
return _model_sha1[name][:8]
|
||||
|
||||
|
||||
def find_params_file(dir_path):
|
||||
if not os.path.exists(dir_path):
|
||||
return None
|
||||
paths = glob.glob("%s/*.params"%dir_path)
|
||||
if len(paths)==0:
|
||||
return None
|
||||
paths = sorted(paths)
|
||||
return paths[-1]
|
||||
|
||||
def get_model_file(name, root=os.path.join('~', '.insightface', 'models')):
|
||||
r"""Return location for the pretrained on local file system.
|
||||
|
||||
This function will download from online model zoo when model cannot be found or has mismatch.
|
||||
The root directory will be created if it doesn't exist.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the model.
|
||||
root : str, default '~/.mxnet/models'
|
||||
Location for keeping the model parameters.
|
||||
|
||||
Returns
|
||||
-------
|
||||
file_path
|
||||
Path to the requested pretrained model file.
|
||||
"""
|
||||
|
||||
file_name = name
|
||||
root = os.path.expanduser(root)
|
||||
dir_path = os.path.join(root, name)
|
||||
file_path = find_params_file(dir_path)
|
||||
#file_path = os.path.join(root, file_name + '.params')
|
||||
sha1_hash = _model_sha1[name]
|
||||
if file_path is not None:
|
||||
if check_sha1(file_path, sha1_hash):
|
||||
return file_path
|
||||
else:
|
||||
print('Mismatch in the content of model file detected. Downloading again.')
|
||||
else:
|
||||
print('Model file is not found. Downloading.')
|
||||
|
||||
if not os.path.exists(root):
|
||||
os.makedirs(root)
|
||||
if not os.path.exists(dir_path):
|
||||
os.makedirs(dir_path)
|
||||
|
||||
zip_file_path = os.path.join(root, file_name + '.zip')
|
||||
repo_url = base_repo_url
|
||||
if repo_url[-1] != '/':
|
||||
repo_url = repo_url + '/'
|
||||
download(_url_format.format(repo_url=repo_url, file_name=file_name),
|
||||
path=zip_file_path,
|
||||
overwrite=True)
|
||||
with zipfile.ZipFile(zip_file_path) as zf:
|
||||
zf.extractall(dir_path)
|
||||
os.remove(zip_file_path)
|
||||
file_path = find_params_file(dir_path)
|
||||
|
||||
if check_sha1(file_path, sha1_hash):
|
||||
return file_path
|
||||
else:
|
||||
raise ValueError('Downloaded file has different hash. Please try again.')
|
||||
|
||||
|
||||
57
python-package/insightface/model_zoo/model_zoo.py
Normal file
57
python-package/insightface/model_zoo/model_zoo.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# pylint: disable=wildcard-import, unused-wildcard-import
|
||||
"""
|
||||
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/model_zoo/model_zoo.py
|
||||
"""
|
||||
from .face_recognition import *
|
||||
from .face_detection import *
|
||||
from .face_genderage import *
|
||||
#from .face_alignment import *
|
||||
|
||||
__all__ = ['get_model', 'get_model_list']
|
||||
|
||||
_models = {
|
||||
'arcface_r100_v1': arcface_r100_v1,
|
||||
#'arcface_mfn_v1': arcface_mfn_v1,
|
||||
#'arcface_outofreach_v1': arcface_outofreach_v1,
|
||||
'retinaface_r50_v1': retinaface_r50_v1,
|
||||
'retinaface_mnet025_v1': retinaface_mnet025_v1,
|
||||
'retinaface_mnet025_v2': retinaface_mnet025_v2,
|
||||
'genderage_v1': genderage_v1,
|
||||
}
|
||||
|
||||
|
||||
def get_model(name, **kwargs):
|
||||
"""Returns a pre-defined model by name
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name : str
|
||||
Name of the model.
|
||||
root : str, default '~/.insightface/models'
|
||||
Location for keeping the model parameters.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Model
|
||||
The model.
|
||||
"""
|
||||
name = name.lower()
|
||||
if name not in _models:
|
||||
err_str = '"%s" is not among the following model list:\n\t' % (name)
|
||||
err_str += '%s' % ('\n\t'.join(sorted(_models.keys())))
|
||||
raise ValueError(err_str)
|
||||
net = _models[name](**kwargs)
|
||||
return net
|
||||
|
||||
|
||||
def get_model_list():
|
||||
"""Get the entire list of model names in model_zoo.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of str
|
||||
Entire list of model names in model_zoo.
|
||||
|
||||
"""
|
||||
return sorted(_models.keys())
|
||||
|
||||
17
python-package/insightface/utils/__init__.py
Normal file
17
python-package/insightface/utils/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
#from . import bbox
|
||||
#from . import viz
|
||||
#from . import random
|
||||
#from . import metrics
|
||||
#from . import parallel
|
||||
|
||||
from .download import download, check_sha1
|
||||
from .filesystem import makedirs
|
||||
from .filesystem import try_import_dali
|
||||
#from .bbox import bbox_iou
|
||||
#from .block import recursive_visit, set_lr_mult, freeze_bn
|
||||
#from .lr_scheduler import LRSequential, LRScheduler
|
||||
#from .plot_history import TrainingHistory
|
||||
#from .export_helper import export_block
|
||||
#from .sync_loader_helper import split_data, split_and_load
|
||||
90
python-package/insightface/utils/download.py
Normal file
90
python-package/insightface/utils/download.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/download.py
|
||||
"""
|
||||
import os
|
||||
import hashlib
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
def check_sha1(filename, sha1_hash):
|
||||
"""Check whether the sha1 hash of the file content matches the expected hash.
|
||||
Parameters
|
||||
----------
|
||||
filename : str
|
||||
Path to the file.
|
||||
sha1_hash : str
|
||||
Expected sha1 hash in hexadecimal digits.
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
Whether the file content matches the expected hash.
|
||||
"""
|
||||
sha1 = hashlib.sha1()
|
||||
with open(filename, 'rb') as f:
|
||||
while True:
|
||||
data = f.read(1048576)
|
||||
if not data:
|
||||
break
|
||||
sha1.update(data)
|
||||
|
||||
sha1_file = sha1.hexdigest()
|
||||
l = min(len(sha1_file), len(sha1_hash))
|
||||
return sha1.hexdigest()[0:l] == sha1_hash[0:l]
|
||||
|
||||
def download(url, path=None, overwrite=False, sha1_hash=None):
|
||||
"""Download an given URL
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL to download
|
||||
path : str, optional
|
||||
Destination path to store downloaded file. By default stores to the
|
||||
current directory with same name as in url.
|
||||
overwrite : bool, optional
|
||||
Whether to overwrite destination file if already exists.
|
||||
sha1_hash : str, optional
|
||||
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
|
||||
but doesn't match.
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The file path of the downloaded file.
|
||||
"""
|
||||
if path is None:
|
||||
fname = url.split('/')[-1]
|
||||
else:
|
||||
path = os.path.expanduser(path)
|
||||
if os.path.isdir(path):
|
||||
fname = os.path.join(path, url.split('/')[-1])
|
||||
else:
|
||||
fname = path
|
||||
|
||||
if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
|
||||
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
print('Downloading %s from %s...'%(fname, url))
|
||||
r = requests.get(url, stream=True)
|
||||
if r.status_code != 200:
|
||||
raise RuntimeError("Failed downloading url %s"%url)
|
||||
total_length = r.headers.get('content-length')
|
||||
with open(fname, 'wb') as f:
|
||||
if total_length is None: # no content length header
|
||||
for chunk in r.iter_content(chunk_size=1024):
|
||||
if chunk: # filter out keep-alive new chunks
|
||||
f.write(chunk)
|
||||
else:
|
||||
total_length = int(total_length)
|
||||
for chunk in tqdm(r.iter_content(chunk_size=1024),
|
||||
total=int(total_length / 1024. + 0.5),
|
||||
unit='KB', unit_scale=False, dynamic_ncols=True):
|
||||
f.write(chunk)
|
||||
|
||||
if sha1_hash and not check_sha1(fname, sha1_hash):
|
||||
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
|
||||
'The repo may be outdated or download may be incomplete. ' \
|
||||
'If the "repo_url" is overridden, consider switching to ' \
|
||||
'the default repo.'.format(fname))
|
||||
|
||||
return fname
|
||||
88
python-package/insightface/utils/face_align.py
Normal file
88
python-package/insightface/utils/face_align.py
Normal file
@@ -0,0 +1,88 @@
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from skimage import transform as trans
|
||||
|
||||
src1 = np.array([
|
||||
[51.642,50.115],
|
||||
[57.617,49.990],
|
||||
[35.740,69.007],
|
||||
[51.157,89.050],
|
||||
[57.025,89.702]], dtype=np.float32)
|
||||
#<--left
|
||||
src2 = np.array([
|
||||
[45.031,50.118],
|
||||
[65.568,50.872],
|
||||
[39.677,68.111],
|
||||
[45.177,86.190],
|
||||
[64.246,86.758]], dtype=np.float32)
|
||||
|
||||
#---frontal
|
||||
src3 = np.array([
|
||||
[39.730,51.138],
|
||||
[72.270,51.138],
|
||||
[56.000,68.493],
|
||||
[42.463,87.010],
|
||||
[69.537,87.010]], dtype=np.float32)
|
||||
|
||||
#-->right
|
||||
src4 = np.array([
|
||||
[46.845,50.872],
|
||||
[67.382,50.118],
|
||||
[72.737,68.111],
|
||||
[48.167,86.758],
|
||||
[67.236,86.190]], dtype=np.float32)
|
||||
|
||||
#-->right profile
|
||||
src5 = np.array([
|
||||
[54.796,49.990],
|
||||
[60.771,50.115],
|
||||
[76.673,69.007],
|
||||
[55.388,89.702],
|
||||
[61.257,89.050]], dtype=np.float32)
|
||||
|
||||
src = np.array([src1,src2,src3,src4,src5])
|
||||
src_map = {112 : src, 224 : src*2}
|
||||
|
||||
arcface_src = np.array([
|
||||
[38.2946, 51.6963],
|
||||
[73.5318, 51.5014],
|
||||
[56.0252, 71.7366],
|
||||
[41.5493, 92.3655],
|
||||
[70.7299, 92.2041] ], dtype=np.float32 )
|
||||
|
||||
arcface_src = np.expand_dims(arcface_src, axis=0)
|
||||
|
||||
# In[66]:
|
||||
|
||||
# lmk is prediction; src is template
|
||||
def estimate_norm(lmk, image_size = 112, mode='arcface'):
|
||||
assert lmk.shape==(5,2)
|
||||
tform = trans.SimilarityTransform()
|
||||
lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
|
||||
min_M = []
|
||||
min_index = []
|
||||
min_error = float('inf')
|
||||
if mode=='arcface':
|
||||
assert image_size==112
|
||||
src = arcface_src
|
||||
else:
|
||||
src = src_map[image_size]
|
||||
for i in np.arange(src.shape[0]):
|
||||
tform.estimate(lmk, src[i])
|
||||
M = tform.params[0:2,:]
|
||||
results = np.dot(M, lmk_tran.T)
|
||||
results = results.T
|
||||
error = np.sum(np.sqrt(np.sum((results - src[i]) ** 2,axis=1)))
|
||||
# print(error)
|
||||
if error< min_error:
|
||||
min_error = error
|
||||
min_M = M
|
||||
min_index = i
|
||||
return min_M, min_index
|
||||
|
||||
def norm_crop(img, landmark, image_size=112, mode='arcface'):
|
||||
M, pose_index = estimate_norm(landmark, image_size, mode)
|
||||
warped = cv2.warpAffine(img,M, (image_size, image_size), borderValue = 0.0)
|
||||
return warped
|
||||
|
||||
137
python-package/insightface/utils/filesystem.py
Normal file
137
python-package/insightface/utils/filesystem.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
This code file mainly comes from https://github.com/dmlc/gluon-cv/blob/master/gluoncv/utils/filesystem.py
|
||||
"""
|
||||
import os
|
||||
import errno
|
||||
|
||||
def makedirs(path):
|
||||
"""Create directory recursively if not exists.
|
||||
Similar to `makedir -p`, you can skip checking existence before this function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
Path of the desired dir
|
||||
"""
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as exc:
|
||||
if exc.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
def try_import(package, message=None):
|
||||
"""Try import specified package, with custom message support.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
package : str
|
||||
The name of the targeting package.
|
||||
message : str, default is None
|
||||
If not None, this function will raise customized error message when import error is found.
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
module if found, raise ImportError otherwise
|
||||
|
||||
"""
|
||||
try:
|
||||
return __import__(package)
|
||||
except ImportError as e:
|
||||
if not message:
|
||||
raise e
|
||||
raise ImportError(message)
|
||||
|
||||
def try_import_cv2():
|
||||
"""Try import cv2 at runtime.
|
||||
|
||||
Returns
|
||||
-------
|
||||
cv2 module if found. Raise ImportError otherwise
|
||||
|
||||
"""
|
||||
msg = "cv2 is required, you can install by package manager, e.g. 'apt-get', \
|
||||
or `pip install opencv-python --user` (note that this is unofficial PYPI package)."
|
||||
return try_import('cv2', msg)
|
||||
|
||||
def try_import_mmcv():
|
||||
"""Try import mmcv at runtime.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mmcv module if found. Raise ImportError otherwise
|
||||
|
||||
"""
|
||||
msg = "mmcv is required, you can install by first `pip install Cython --user` \
|
||||
and then `pip install mmcv --user` (note that this is unofficial PYPI package)."
|
||||
return try_import('mmcv', msg)
|
||||
|
||||
def try_import_rarfile():
|
||||
"""Try import rarfile at runtime.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rarfile module if found. Raise ImportError otherwise
|
||||
|
||||
"""
|
||||
msg = "rarfile is required, you can install by first `sudo apt-get install unrar` \
|
||||
and then `pip install rarfile --user` (note that this is unofficial PYPI package)."
|
||||
return try_import('rarfile', msg)
|
||||
|
||||
def import_try_install(package, extern_url=None):
|
||||
"""Try import the specified package.
|
||||
If the package not installed, try use pip to install and import if success.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
package : str
|
||||
The name of the package trying to import.
|
||||
extern_url : str or None, optional
|
||||
The external url if package is not hosted on PyPI.
|
||||
For example, you can install a package using:
|
||||
"pip install git+http://github.com/user/repo/tarball/master/egginfo=xxx".
|
||||
In this case, you can pass the url to the extern_url.
|
||||
|
||||
Returns
|
||||
-------
|
||||
<class 'Module'>
|
||||
The imported python module.
|
||||
|
||||
"""
|
||||
try:
|
||||
return __import__(package)
|
||||
except ImportError:
|
||||
try:
|
||||
from pip import main as pipmain
|
||||
except ImportError:
|
||||
from pip._internal import main as pipmain
|
||||
|
||||
# trying to install package
|
||||
url = package if extern_url is None else extern_url
|
||||
pipmain(['install', '--user', url]) # will raise SystemExit Error if fails
|
||||
|
||||
# trying to load again
|
||||
try:
|
||||
return __import__(package)
|
||||
except ImportError:
|
||||
import sys
|
||||
import site
|
||||
user_site = site.getusersitepackages()
|
||||
if user_site not in sys.path:
|
||||
sys.path.append(user_site)
|
||||
return __import__(package)
|
||||
return __import__(package)
|
||||
|
||||
def try_import_dali():
|
||||
"""Try import NVIDIA DALI at runtime.
|
||||
"""
|
||||
try:
|
||||
dali = __import__('nvidia.dali', fromlist=['pipeline', 'ops', 'types'])
|
||||
dali.Pipeline = dali.pipeline.Pipeline
|
||||
except ImportError:
|
||||
class dali:
|
||||
class Pipeline:
|
||||
def __init__(self):
|
||||
raise NotImplementedError(
|
||||
"DALI not found, please check if you installed it correctly.")
|
||||
return dali
|
||||
61
python-package/setup.py
Normal file
61
python-package/setup.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
def read(*names, **kwargs):
|
||||
with io.open(
|
||||
os.path.join(os.path.dirname(__file__), *names),
|
||||
encoding=kwargs.get("encoding", "utf8")
|
||||
) as fp:
|
||||
return fp.read()
|
||||
|
||||
|
||||
def find_version(*file_paths):
|
||||
version_file = read(*file_paths)
|
||||
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
|
||||
version_file, re.M)
|
||||
if version_match:
|
||||
return version_match.group(1)
|
||||
raise RuntimeError("Unable to find version string.")
|
||||
|
||||
try:
|
||||
import pypandoc
|
||||
long_description = pypandoc.convert('README.md', 'rst')
|
||||
except(IOError, ImportError):
|
||||
long_description = open('README.md').read()
|
||||
|
||||
VERSION = find_version('insightface', '__init__.py')
|
||||
|
||||
requirements = [
|
||||
'numpy',
|
||||
'tqdm',
|
||||
'requests',
|
||||
'matplotlib',
|
||||
'Pillow',
|
||||
'scipy',
|
||||
'opencv-python',
|
||||
'scikit-learn',
|
||||
'scikit-image',
|
||||
'easydict',
|
||||
]
|
||||
|
||||
setup(
|
||||
# Metadata
|
||||
name='insightface',
|
||||
version=VERSION,
|
||||
author='InsightFace Contributors',
|
||||
url='https://github.com/deepinsight/insightface',
|
||||
description='InsightFace Toolkit',
|
||||
long_description=long_description,
|
||||
license='Apache-2.0',
|
||||
# Package info
|
||||
packages=find_packages(exclude=('docs', 'tests', 'scripts')),
|
||||
zip_safe=True,
|
||||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
)
|
||||
|
||||
124
recognition/data/build_eval_pack.py
Normal file
124
recognition/data/build_eval_pack.py
Normal file
@@ -0,0 +1,124 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
#import mxnet as mx
|
||||
#from mxnet import ndarray as nd
|
||||
import argparse
|
||||
import cv2
|
||||
import pickle
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'common'))
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'RetinaFace'))
|
||||
import face_align
|
||||
from retinaface import RetinaFace
|
||||
|
||||
def to_rgb(img):
|
||||
w, h = img.shape
|
||||
ret = np.empty((w, h, 3), dtype=np.uint8)
|
||||
ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
|
||||
return ret
|
||||
|
||||
|
||||
def IOU(Reframe,GTframe):
|
||||
x1 = Reframe[0];
|
||||
y1 = Reframe[1];
|
||||
width1 = Reframe[2]-Reframe[0];
|
||||
height1 = Reframe[3]-Reframe[1];
|
||||
|
||||
x2 = GTframe[0]
|
||||
y2 = GTframe[1]
|
||||
width2 = GTframe[2]-GTframe[0]
|
||||
height2 = GTframe[3]-GTframe[1]
|
||||
|
||||
endx = max(x1+width1,x2+width2)
|
||||
startx = min(x1,x2)
|
||||
width = width1+width2-(endx-startx)
|
||||
|
||||
endy = max(y1+height1,y2+height2)
|
||||
starty = min(y1,y2)
|
||||
height = height1+height2-(endy-starty)
|
||||
|
||||
if width <=0 or height <= 0:
|
||||
ratio = 0
|
||||
else:
|
||||
Area = width*height
|
||||
Area1 = width1*height1
|
||||
Area2 = width2*height2
|
||||
ratio = Area*1./(Area1+Area2-Area)
|
||||
return ratio
|
||||
|
||||
parser = argparse.ArgumentParser(description='Package eval images')
|
||||
# general
|
||||
parser.add_argument('--data-dir', default='', help='')
|
||||
parser.add_argument('--image-size', type=int, default=112, help='')
|
||||
parser.add_argument('--gpu', type=int, default=0, help='')
|
||||
parser.add_argument('--det-prefix', type=str, default='./model/R50', help='')
|
||||
parser.add_argument('--output', default='./', help='path to save.')
|
||||
parser.add_argument('--align-mode', default='arcface', help='align mode.')
|
||||
args = parser.parse_args()
|
||||
|
||||
gpu_id = args.gpu
|
||||
|
||||
detector = RetinaFace(args.det_prefix, 0, gpu_id, network='net3')
|
||||
target_size = 400
|
||||
max_size = 800
|
||||
|
||||
def get_norm_crop(image_path):
|
||||
im = cv2.imread(image_path)
|
||||
im_shape = im.shape
|
||||
im_size_min = np.min(im_shape[0:2])
|
||||
im_size_max = np.max(im_shape[0:2])
|
||||
im_scale = float(target_size) / float(im_size_min)
|
||||
# prevent bigger axis from being more than max_size:
|
||||
if np.round(im_scale * im_size_max) > max_size:
|
||||
im_scale = float(max_size) / float(im_size_max)
|
||||
bbox, landmark = detector.detect(im, threshold=0.5, scales=[im_scale])
|
||||
#print(im.shape, bbox.shape, landmark.shape)
|
||||
if bbox.shape[0]==0:
|
||||
bbox, landmark = detector.detect(im, threshold=0.05, scales=[im_scale*0.75, im_scale, im_scale*2.0])
|
||||
print('refine', im.shape, bbox.shape, landmark.shape)
|
||||
nrof_faces = bbox.shape[0]
|
||||
if nrof_faces>0:
|
||||
det = bbox[:,0:4]
|
||||
img_size = np.asarray(im.shape)[0:2]
|
||||
bindex = 0
|
||||
if nrof_faces>1:
|
||||
bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
|
||||
img_center = img_size / 2
|
||||
offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
|
||||
offset_dist_squared = np.sum(np.power(offsets,2.0),0)
|
||||
bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
|
||||
#_bbox = bounding_boxes[bindex, 0:4]
|
||||
_landmark = landmark[bindex]
|
||||
warped = face_align.norm_crop(im, landmark = _landmark, image_size=args.image_size, mode=args.align_mode)
|
||||
return warped
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
bins = []
|
||||
issame_list = []
|
||||
pp = 0
|
||||
for line in open(os.path.join(args.data_dir, 'pairs_label.txt'), 'r'):
|
||||
pp+=1
|
||||
if pp%100==0:
|
||||
print('processing', pp)
|
||||
line = line.strip().split()
|
||||
assert len(line)==3
|
||||
path1 = os.path.join(args.data_dir, line[0])
|
||||
path2 = os.path.join(args.data_dir, line[1])
|
||||
im1 = get_norm_crop(path1)
|
||||
im2 = get_norm_crop(path2)
|
||||
issame = True
|
||||
if line[2]=='0':
|
||||
issame = False
|
||||
issame_list.append(issame)
|
||||
for im in [im1, im2]:
|
||||
_, s = cv2.imencode('.jpg', im)
|
||||
bins.append(s)
|
||||
|
||||
with open(args.output, 'wb') as f:
|
||||
pickle.dump((bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
6000
recognition/data/lfw/pairs_label.txt
Normal file
6000
recognition/data/lfw/pairs_label.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -221,7 +221,7 @@ def test(lfw_set, mx_model, batch_size):
|
||||
issame_list = lfw_set[1]
|
||||
model = mx_model
|
||||
embeddings_list = []
|
||||
for i in xrange( len(lfw_data_list) ):
|
||||
for i in range( len(lfw_data_list) ):
|
||||
lfw_data = lfw_data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
@@ -256,7 +256,7 @@ def test(lfw_set, mx_model, batch_size):
|
||||
_xnorm = 0.0
|
||||
_xnorm_cnt = 0
|
||||
for embed in embeddings_list:
|
||||
for i in xrange(embed.shape[0]):
|
||||
for i in range(embed.shape[0]):
|
||||
_em = embed[i]
|
||||
_norm=np.linalg.norm(_em)
|
||||
#print(_em.shape, _norm)
|
||||
|
||||
@@ -180,12 +180,17 @@ def evaluate(embeddings, actual_issame, nrof_folds=10, pca = 0):
|
||||
return tpr, fpr, accuracy, val, val_std, far
|
||||
|
||||
def load_bin(path, image_size):
|
||||
bins, issame_list = pickle.load(open(path, 'rb'))
|
||||
try:
|
||||
with open(path, 'rb') as f:
|
||||
bins, issame_list = pickle.load(f) #py2
|
||||
except UnicodeDecodeError as e:
|
||||
with open(path, 'rb') as f:
|
||||
bins, issame_list = pickle.load(f, encoding='bytes') #py3
|
||||
data_list = []
|
||||
for flip in [0,1]:
|
||||
data = nd.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
|
||||
data_list.append(data)
|
||||
for i in xrange(len(issame_list)*2):
|
||||
for i in range(len(issame_list)*2):
|
||||
_bin = bins[i]
|
||||
img = mx.image.imdecode(_bin)
|
||||
if img.shape[1]!=image_size[0]:
|
||||
@@ -213,7 +218,7 @@ def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_sha
|
||||
_label = nd.ones( (batch_size,) )
|
||||
else:
|
||||
_label = nd.ones( label_shape )
|
||||
for i in xrange( len(data_list) ):
|
||||
for i in range( len(data_list) ):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
@@ -255,7 +260,7 @@ def test(data_set, mx_model, batch_size, nfolds=10, data_extra = None, label_sha
|
||||
_xnorm = 0.0
|
||||
_xnorm_cnt = 0
|
||||
for embed in embeddings_list:
|
||||
for i in xrange(embed.shape[0]):
|
||||
for i in range(embed.shape[0]):
|
||||
_em = embed[i]
|
||||
_norm=np.linalg.norm(_em)
|
||||
#print(_em.shape, _norm)
|
||||
@@ -293,7 +298,7 @@ def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, lab
|
||||
_label = nd.ones( (batch_size,) )
|
||||
else:
|
||||
_label = nd.ones( label_shape )
|
||||
for i in xrange( len(data_list) ):
|
||||
for i in range( len(data_list) ):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
@@ -438,7 +443,7 @@ def test_badcase(data_set, mx_model, batch_size, name='', data_extra = None, lab
|
||||
# imgb = cv2.transpose(imgb)
|
||||
# imgb = cv2.flip(imgb, 0)
|
||||
#else:
|
||||
# for ii in xrange(2):
|
||||
# for ii in range(2):
|
||||
# imgb = cv2.transpose(imgb)
|
||||
# imgb = cv2.flip(imgb, 1)
|
||||
dist = out[2]
|
||||
@@ -469,7 +474,7 @@ def dumpR(data_set, mx_model, batch_size, name='', data_extra = None, label_shap
|
||||
_label = nd.ones( (batch_size,) )
|
||||
else:
|
||||
_label = nd.ones( label_shape )
|
||||
for i in xrange( len(data_list) ):
|
||||
for i in range( len(data_list) ):
|
||||
data = data_list[i]
|
||||
embeddings = None
|
||||
ba = 0
|
||||
@@ -571,7 +576,7 @@ if __name__ == '__main__':
|
||||
ver_name_list.append(name)
|
||||
|
||||
if args.mode==0:
|
||||
for i in xrange(len(ver_list)):
|
||||
for i in range(len(ver_list)):
|
||||
results = []
|
||||
for model in nets:
|
||||
acc1, std1, acc2, std2, xnorm, embeddings_list = test(ver_list[i], model, args.batch_size, args.nfolds)
|
||||
|
||||
@@ -12,8 +12,6 @@ import sklearn
|
||||
import datetime
|
||||
import numpy as np
|
||||
import cv2
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
import mxnet as mx
|
||||
from mxnet import ndarray as nd
|
||||
@@ -166,11 +164,13 @@ class FaceImageIter(io.DataIter):
|
||||
def mirror_aug(self, img):
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
for c in xrange(img.shape[2]):
|
||||
for c in range(img.shape[2]):
|
||||
img[:,:,c] = np.fliplr(img[:,:,c])
|
||||
return img
|
||||
|
||||
def compress_aug(self, img):
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
buf = BytesIO()
|
||||
img = Image.fromarray(img.asnumpy(), 'RGB')
|
||||
q = random.randint(2, 20)
|
||||
|
||||
@@ -122,7 +122,8 @@ class ParallModule(BaseModule):
|
||||
#ag = {}
|
||||
#ax = {}
|
||||
rk = []
|
||||
for k,v in g.iteritems():
|
||||
for k in g:
|
||||
v = g[k]
|
||||
if k.startswith('fc7'):
|
||||
p1 = k.find('_')
|
||||
p2 = k.rfind('_')
|
||||
@@ -131,10 +132,6 @@ class ParallModule(BaseModule):
|
||||
rk.append(k)
|
||||
for k in rk:
|
||||
del g[k]
|
||||
#for k,v in g.iteritems():
|
||||
# print('g', k, v.shape)
|
||||
#for k,v in ag.iteritems():
|
||||
# print('ag', k, v.shape)
|
||||
self._curr_module.set_params(g, x)
|
||||
#self._arcface_module.set_params(ag, ax)
|
||||
|
||||
|
||||
@@ -606,6 +606,12 @@ def get_symbol():
|
||||
units = [3, 8, 35, 3]
|
||||
elif num_layers == 100:
|
||||
units = [3, 13, 30, 3]
|
||||
elif num_layers == 134:
|
||||
units = [3, 10, 50, 3]
|
||||
elif num_layers == 136:
|
||||
units = [3, 13, 48, 3]
|
||||
elif num_layers == 140:
|
||||
units = [3, 15, 48, 3]
|
||||
elif num_layers == 124:
|
||||
units = [3, 13, 40, 5]
|
||||
elif num_layers == 160:
|
||||
|
||||
@@ -149,7 +149,7 @@ def train_net(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
@@ -270,7 +270,7 @@ def train_net(args):
|
||||
|
||||
def ver_test(nbatch):
|
||||
results = []
|
||||
for i in xrange(len(ver_list)):
|
||||
for i in range(len(ver_list)):
|
||||
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None)
|
||||
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
|
||||
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
|
||||
@@ -281,7 +281,7 @@ def train_net(args):
|
||||
|
||||
|
||||
highest_acc = [0.0, 0.0] #lfw and target
|
||||
#for i in xrange(len(ver_list)):
|
||||
#for i in range(len(ver_list)):
|
||||
# highest_acc.append(0.0)
|
||||
global_step = [0]
|
||||
save_step = [0]
|
||||
|
||||
@@ -62,6 +62,7 @@ def parse_args():
|
||||
parser.add_argument('--per-batch-size', type=int, default=default.per_batch_size, help='batch size in each context')
|
||||
parser.add_argument('--kvstore', type=str, default=default.kvstore, help='kvstore setting')
|
||||
parser.add_argument('--worker-id', type=int, default=0, help='worker id for dist training, starts from 0')
|
||||
parser.add_argument('--extra-model-name', type=str, default='', help='extra model name')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
@@ -126,14 +127,17 @@ def train_net(args):
|
||||
ctx = []
|
||||
cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
|
||||
if len(cvd)>0:
|
||||
for i in xrange(len(cvd.split(','))):
|
||||
for i in range(len(cvd.split(','))):
|
||||
ctx.append(mx.gpu(i))
|
||||
if len(ctx)==0:
|
||||
ctx = [mx.cpu()]
|
||||
print('use cpu')
|
||||
else:
|
||||
print('gpu num:', len(ctx))
|
||||
prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model')
|
||||
if len(args.extra_model_name)==0:
|
||||
prefix = os.path.join(args.models_root, '%s-%s-%s'%(args.network, args.loss, args.dataset), 'model')
|
||||
else:
|
||||
prefix = os.path.join(args.models_root, '%s-%s-%s-%s'%(args.network, args.loss, args.dataset, args.extra_model_name), 'model')
|
||||
prefix_dir = os.path.dirname(prefix)
|
||||
print('prefix', prefix)
|
||||
if not os.path.exists(prefix_dir):
|
||||
@@ -249,7 +253,7 @@ def train_net(args):
|
||||
|
||||
def ver_test(nbatch):
|
||||
results = []
|
||||
for i in xrange(len(ver_list)):
|
||||
for i in range(len(ver_list)):
|
||||
acc1, std1, acc2, std2, xnorm, embeddings_list = verification.test(ver_list[i], model, args.batch_size, 10, None, None)
|
||||
print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
|
||||
#print('[%s][%d]Accuracy: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc1, std1))
|
||||
@@ -259,7 +263,7 @@ def train_net(args):
|
||||
|
||||
|
||||
highest_acc = [0.0, 0.0] #lfw and target
|
||||
#for i in xrange(len(ver_list)):
|
||||
#for i in range(len(ver_list)):
|
||||
# highest_acc.append(0.0)
|
||||
global_step = [0]
|
||||
save_step = [0]
|
||||
|
||||
@@ -111,12 +111,12 @@ class FaceImageIter(io.DataIter):
|
||||
|
||||
def pairwise_dists(self, embeddings):
|
||||
nd_embedding_list = []
|
||||
for i in xrange(self.ctx_num):
|
||||
for i in range(self.ctx_num):
|
||||
nd_embedding = mx.nd.array(embeddings, mx.gpu(i))
|
||||
nd_embedding_list.append(nd_embedding)
|
||||
nd_pdists = []
|
||||
pdists = []
|
||||
for idx in xrange(embeddings.shape[0]):
|
||||
for idx in range(embeddings.shape[0]):
|
||||
emb_idx = idx%self.ctx_num
|
||||
nd_embedding = nd_embedding_list[emb_idx]
|
||||
a_embedding = nd_embedding[idx]
|
||||
@@ -138,16 +138,16 @@ class FaceImageIter(io.DataIter):
|
||||
pdists = self.pairwise_dists(embeddings)
|
||||
#self.times[3] += self.time_elapsed()
|
||||
|
||||
for i in xrange(people_per_batch):
|
||||
for i in range(people_per_batch):
|
||||
nrof_images = int(nrof_images_per_class[i])
|
||||
for j in xrange(1,nrof_images):
|
||||
for j in range(1,nrof_images):
|
||||
#self.time_reset()
|
||||
a_idx = emb_start_idx + j - 1
|
||||
#neg_dists_sqr = np.sum(np.square(embeddings[a_idx] - embeddings), 1)
|
||||
neg_dists_sqr = pdists[a_idx]
|
||||
#self.times[3] += self.time_elapsed()
|
||||
|
||||
for pair in xrange(j, nrof_images): # For every possible positive pair.
|
||||
for pair in range(j, nrof_images): # For every possible positive pair.
|
||||
p_idx = emb_start_idx + pair
|
||||
#self.time_reset()
|
||||
pos_dist_sqr = np.sum(np.square(embeddings[a_idx]-embeddings[p_idx]))
|
||||
@@ -234,7 +234,7 @@ class FaceImageIter(io.DataIter):
|
||||
#_label = _batch.label[0].asnumpy()
|
||||
#data[ba:bb,:,:,:] = _data
|
||||
#label[ba:bb] = _label
|
||||
for i in xrange(ba, bb):
|
||||
for i in range(ba, bb):
|
||||
#print(ba, bb, self.triplet_cur, i, len(self.triplet_seq))
|
||||
_idx = self.triplet_seq[i+self.triplet_cur]
|
||||
s = self.imgrec.read_idx(_idx)
|
||||
@@ -269,7 +269,7 @@ class FaceImageIter(io.DataIter):
|
||||
self.times[1] += self.time_elapsed()
|
||||
self.time_reset()
|
||||
nrof_images_per_class = [1]
|
||||
for i in xrange(1, bag_size):
|
||||
for i in range(1, bag_size):
|
||||
if tag[i][0]==tag[i-1][0]:
|
||||
nrof_images_per_class[-1]+=1
|
||||
else:
|
||||
@@ -283,7 +283,7 @@ class FaceImageIter(io.DataIter):
|
||||
if bb>len(triplets):
|
||||
break
|
||||
_triplets = triplets[ba:bb]
|
||||
for i in xrange(3):
|
||||
for i in range(3):
|
||||
for triplet in _triplets:
|
||||
_pos = triplet[i]
|
||||
_idx = tag[_pos][1]
|
||||
@@ -306,7 +306,7 @@ class FaceImageIter(io.DataIter):
|
||||
print('loading batch',batch_num, ba)
|
||||
bb = min(ba+self.batch_size, len(self.oseq))
|
||||
_count = bb-ba
|
||||
for i in xrange(_count):
|
||||
for i in range(_count):
|
||||
idx = self.oseq[i+ba]
|
||||
s = self.imgrec.read_idx(idx)
|
||||
header, img = recordio.unpack(s)
|
||||
@@ -323,7 +323,7 @@ class FaceImageIter(io.DataIter):
|
||||
if X is None:
|
||||
X = np.zeros( (len(self.id2range), nembedding.shape[1]), dtype=np.float32 )
|
||||
nplabel = label.asnumpy()
|
||||
for i in xrange(_count):
|
||||
for i in range(_count):
|
||||
ilabel = int(nplabel[i])
|
||||
#print(ilabel, ilabel.__class__)
|
||||
X[ilabel] += nembedding[i]
|
||||
@@ -331,14 +331,14 @@ class FaceImageIter(io.DataIter):
|
||||
X = sklearn.preprocessing.normalize(X)
|
||||
d = X.shape[1]
|
||||
t = AnnoyIndex(d, metric='euclidean')
|
||||
for i in xrange(X.shape[0]):
|
||||
for i in range(X.shape[0]):
|
||||
t.add_item(i, X[i])
|
||||
print('start to build index')
|
||||
t.build(20)
|
||||
print(X.shape)
|
||||
k = self.per_identities
|
||||
self.seq = []
|
||||
for i in xrange(X.shape[0]):
|
||||
for i in range(X.shape[0]):
|
||||
nnlist = t.get_nns_by_item(i, k)
|
||||
assert nnlist[0]==i
|
||||
for _label in nnlist:
|
||||
@@ -350,7 +350,7 @@ class FaceImageIter(io.DataIter):
|
||||
random.shuffle(_list)
|
||||
else:
|
||||
_list = np.random.choice(_list, self.images_per_identity, replace=False)
|
||||
for i in xrange(self.images_per_identity):
|
||||
for i in range(self.images_per_identity):
|
||||
_idx = _list[i%len(_list)]
|
||||
self.seq.append(_idx)
|
||||
#faiss_params = [20,5]
|
||||
@@ -365,9 +365,9 @@ class FaceImageIter(io.DataIter):
|
||||
#D, I = index.search(X, k) # actual search
|
||||
#print(I.shape)
|
||||
#self.seq = []
|
||||
#for i in xrange(I.shape[0]):
|
||||
#for i in range(I.shape[0]):
|
||||
# #assert I[i][0]==i
|
||||
# for j in xrange(k):
|
||||
# for j in range(k):
|
||||
# _label = I[i][j]
|
||||
# assert _label<len(self.id2range)
|
||||
# _id = self.header0[0]+_label
|
||||
@@ -377,7 +377,7 @@ class FaceImageIter(io.DataIter):
|
||||
# random.shuffle(_list)
|
||||
# else:
|
||||
# _list = np.random.choice(_list, self.images_per_identity, replace=False)
|
||||
# for i in xrange(self.images_per_identity):
|
||||
# for i in range(self.images_per_identity):
|
||||
# _idx = _list[i%len(_list)]
|
||||
# self.seq.append(_idx)
|
||||
|
||||
@@ -391,9 +391,10 @@ class FaceImageIter(io.DataIter):
|
||||
elif not self.hard_mining:
|
||||
self.seq = []
|
||||
idlist = []
|
||||
for _id,v in self.id2range.iteritems():
|
||||
for _id in self.id2range:
|
||||
v = self.id2range[_id]
|
||||
idlist.append((_id,range(*v)))
|
||||
for r in xrange(self.repeat):
|
||||
for r in range(self.repeat):
|
||||
if r%10==0:
|
||||
print('repeat', r)
|
||||
if self.shuffle:
|
||||
@@ -406,7 +407,7 @@ class FaceImageIter(io.DataIter):
|
||||
random.shuffle(_list)
|
||||
else:
|
||||
_list = np.random.choice(_list, self.images_per_identity, replace=False)
|
||||
for i in xrange(self.images_per_identity):
|
||||
for i in range(self.images_per_identity):
|
||||
_idx = _list[i%len(_list)]
|
||||
self.seq.append(_idx)
|
||||
else:
|
||||
@@ -470,7 +471,7 @@ class FaceImageIter(io.DataIter):
|
||||
def mirror_aug(self, img):
|
||||
_rd = random.randint(0,1)
|
||||
if _rd==1:
|
||||
for c in xrange(img.shape[2]):
|
||||
for c in range(img.shape[2]):
|
||||
img[:,:,c] = np.fliplr(img[:,:,c])
|
||||
return img
|
||||
|
||||
|
||||
BIN
resources/lfr19_wechat1.jpg
Normal file
BIN
resources/lfr19_wechat1.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 50 KiB |
Reference in New Issue
Block a user