mirror of
https://github.com/deepinsight/insightface.git
synced 2026-05-14 12:17:55 +00:00
352 lines
9.8 KiB
Python
352 lines
9.8 KiB
Python
import numpy as np
|
|
from easydict import EasyDict as edict
|
|
|
|
config = edict()
|
|
|
|
# network related params
|
|
config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
|
|
config.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
|
|
config.PIXEL_SCALE = 1.0
|
|
config.IMAGE_STRIDE = 0
|
|
|
|
# dataset related params
|
|
config.NUM_CLASSES = 2
|
|
config.PRE_SCALES = [(1200, 1600)
|
|
] # first is scale (the shorter side); second is max size
|
|
config.SCALES = [(640, 640)
|
|
] # first is scale (the shorter side); second is max size
|
|
#config.SCALES = [(800, 800)] # first is scale (the shorter side); second is max size
|
|
config.ORIGIN_SCALE = False
|
|
|
|
_ratio = (1., )
|
|
|
|
RAC_SSH = {
|
|
'32': {
|
|
'SCALES': (32, 16),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (8, 4),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
|
|
_ratio = (1., 1.5)
|
|
RAC_SSH2 = {
|
|
'32': {
|
|
'SCALES': (32, 16),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (8, 4),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
|
|
_ratio = (1., 1.5)
|
|
RAC_SSH3 = {
|
|
'32': {
|
|
'SCALES': (32, 16),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'16': {
|
|
'SCALES': (8, 4),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'8': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
'4': {
|
|
'SCALES': (2, 1),
|
|
'BASE_SIZE': 16,
|
|
'RATIOS': _ratio,
|
|
'ALLOWED_BORDER': 9999
|
|
},
|
|
}
|
|
|
|
RAC_RETINA = {}
|
|
_ratios = (1.0, )
|
|
_ass = 2.0**(1.0 / 3)
|
|
_basescale = 1.0
|
|
for _stride in [4, 8, 16, 32, 64]:
|
|
key = str(_stride)
|
|
value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999}
|
|
scales = []
|
|
for _ in range(3):
|
|
scales.append(_basescale)
|
|
_basescale *= _ass
|
|
value['SCALES'] = tuple(scales)
|
|
RAC_RETINA[key] = value
|
|
|
|
config.RPN_ANCHOR_CFG = RAC_SSH #default
|
|
|
|
config.NET_MODE = 2
|
|
config.HEAD_MODULE = 'SSH'
|
|
#config.HEAD_MODULE = 'RF'
|
|
config.LR_MODE = 0
|
|
config.LANDMARK_LR_MULT = 2.0
|
|
config.HEAD_FILTER_NUM = 256
|
|
config.CONTEXT_FILTER_RATIO = 1
|
|
config.max_feat_channel = 9999
|
|
|
|
config.USE_CROP = True
|
|
config.USE_FPN = True
|
|
config.USE_DCN = 0
|
|
config.FACE_LANDMARK = True
|
|
config.USE_OCCLUSION = False
|
|
config.USE_BLUR = False
|
|
config.MORE_SMALL_BOX = True
|
|
|
|
config.LAYER_FIX = False
|
|
|
|
config.CASCADE = 0
|
|
config.CASCADE_MODE = 1
|
|
#config.CASCADE_CLS_STRIDES = [16,8,4]
|
|
#config.CASCADE_BBOX_STRIDES = [64,32]
|
|
config.CASCADE_CLS_STRIDES = [64, 32, 16, 8, 4]
|
|
config.CASCADE_BBOX_STRIDES = [64, 32, 16, 8, 4]
|
|
#config.CASCADE_BBOX_STRIDES = [64,32,16,8]
|
|
|
|
config.HEAD_BOX = False
|
|
config.DENSE_ANCHOR = False
|
|
config.USE_MAXOUT = 0
|
|
config.SHARE_WEIGHT_BBOX = False
|
|
config.SHARE_WEIGHT_LANDMARK = False
|
|
|
|
config.RANDOM_FEAT_STRIDE = False
|
|
config.NUM_CPU = 4
|
|
config.MIXUP = 0.0
|
|
config.USE_3D = False
|
|
|
|
#config.BBOX_MASK_THRESH = 0
|
|
config.COLOR_MODE = 2
|
|
config.COLOR_JITTERING = 0.125
|
|
#config.COLOR_JITTERING = 0
|
|
#config.COLOR_JITTERING = 0.2
|
|
|
|
config.TRAIN = edict()
|
|
|
|
config.TRAIN.IMAGE_ALIGN = 0
|
|
config.TRAIN.MIN_BOX_SIZE = 0
|
|
config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE
|
|
# R-CNN and RPN
|
|
# size of images for each device, 2 for rcnn, 1 for rpn and e2e
|
|
config.TRAIN.BATCH_IMAGES = 8
|
|
# e2e changes behavior of anchor loader and metric
|
|
config.TRAIN.END2END = True
|
|
# group images with similar aspect ratio
|
|
config.TRAIN.ASPECT_GROUPING = False
|
|
|
|
# RPN anchor loader
|
|
# rpn anchors batch size
|
|
config.TRAIN.RPN_ENABLE_OHEM = 2
|
|
config.TRAIN.OHEM_MODE = 1
|
|
config.TRAIN.RPN_BATCH_SIZE = 256
|
|
# rpn anchors sampling params
|
|
config.TRAIN.RPN_FG_FRACTION = 0.25
|
|
config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5
|
|
config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
|
|
if config.CASCADE > 0:
|
|
config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
|
|
config.TRAIN.CASCADE_OVERLAP = [0.4, 0.5]
|
|
config.TRAIN.RPN_CLOBBER_POSITIVES = False
|
|
config.TRAIN.RPN_FORCE_POSITIVE = False
|
|
# rpn bounding box regression params
|
|
config.TRAIN.BBOX_STDS = (1.0, 1.0, 1.0, 1.0)
|
|
config.TRAIN.LANDMARK_STD = 1.0
|
|
|
|
config.TEST = edict()
|
|
|
|
# R-CNN testing
|
|
# use rpn to generate proposal
|
|
config.TEST.HAS_RPN = False
|
|
# size of images for each device
|
|
config.TEST.BATCH_IMAGES = 1
|
|
|
|
# RPN proposal
|
|
config.TEST.CXX_PROPOSAL = True
|
|
config.TEST.RPN_NMS_THRESH = 0.3
|
|
config.TEST.RPN_PRE_NMS_TOP_N = 1000
|
|
config.TEST.RPN_POST_NMS_TOP_N = 3000
|
|
#config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE
|
|
#config.TEST.RPN_MIN_SIZE = [0,0,0]
|
|
|
|
# RCNN nms
|
|
config.TEST.NMS = 0.3
|
|
|
|
config.TEST.SCORE_THRESH = 0.05
|
|
config.TEST.IOU_THRESH = 0.5
|
|
|
|
# network settings
|
|
network = edict()
|
|
|
|
network.ssh = edict()
|
|
|
|
network.mnet = edict()
|
|
#network.mnet.pretrained = 'model/mnasnet'
|
|
#network.mnet.pretrained = 'model/mobilenetv2_0_5'
|
|
#network.mnet.pretrained = 'model/mobilenet_0_5'
|
|
#network.mnet.MULTIPLIER = 0.5
|
|
#network.mnet.pretrained = 'model/mobilenet_0_25'
|
|
#network.mnet.pretrained_epoch = 0
|
|
#network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
|
|
#network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
|
|
#network.mnet.PIXEL_SCALE = 255.0
|
|
network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
|
|
network.mnet.BATCH_IMAGES = 16
|
|
network.mnet.HEAD_FILTER_NUM = 64
|
|
network.mnet.CONTEXT_FILTER_RATIO = 1
|
|
|
|
network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
|
|
network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
|
|
network.mnet.PIXEL_SCALE = 1.0
|
|
#network.mnet.pretrained = 'model/mobilenetfd_0_25' #78
|
|
#network.mnet.pretrained = 'model/mobilenetfd2' #75
|
|
network.mnet.pretrained = 'model/mobilenet025fd0' #78
|
|
#network.mnet.pretrained = 'model/mobilenet025fd1' #75
|
|
#network.mnet.pretrained = 'model/mobilenet025fd2' #
|
|
network.mnet.pretrained_epoch = 0
|
|
network.mnet.max_feat_channel = 8888
|
|
network.mnet.COLOR_MODE = 1
|
|
network.mnet.USE_CROP = True
|
|
network.mnet.RPN_ANCHOR_CFG = RAC_SSH
|
|
network.mnet.LAYER_FIX = True
|
|
network.mnet.LANDMARK_LR_MULT = 2.5
|
|
|
|
network.resnet = edict()
|
|
#network.resnet.pretrained = 'model/ResNet50_v1d'
|
|
#network.resnet.pretrained = 'model/resnet-50'
|
|
network.resnet.pretrained = 'model/resnet-152'
|
|
#network.resnet.pretrained = 'model/senet154'
|
|
#network.resnet.pretrained = 'model/densenet161'
|
|
network.resnet.pretrained_epoch = 0
|
|
#network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
|
|
#network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393])
|
|
#network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
|
|
#network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
|
|
#network.resnet.PIXEL_SCALE = 255.0
|
|
network.resnet.lr_step = '1,2,3,4,5,55,68,80'
|
|
network.resnet.lr = 0.001
|
|
network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
|
|
network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
|
|
network.resnet.PIXEL_SCALE = 1.0
|
|
network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
|
|
network.resnet.BATCH_IMAGES = 8
|
|
network.resnet.HEAD_FILTER_NUM = 256
|
|
network.resnet.CONTEXT_FILTER_RATIO = 1
|
|
network.resnet.USE_DCN = 2
|
|
network.resnet.RPN_BATCH_SIZE = 256
|
|
network.resnet.RPN_ANCHOR_CFG = RAC_RETINA
|
|
|
|
network.resnet.USE_DCN = 0
|
|
network.resnet.pretrained = 'model/resnet-50'
|
|
network.resnet.RPN_ANCHOR_CFG = RAC_SSH
|
|
|
|
# dataset settings
|
|
dataset = edict()
|
|
|
|
dataset.widerface = edict()
|
|
dataset.widerface.dataset = 'widerface'
|
|
dataset.widerface.image_set = 'train'
|
|
dataset.widerface.test_image_set = 'val'
|
|
dataset.widerface.root_path = 'data'
|
|
dataset.widerface.dataset_path = 'data/widerface'
|
|
dataset.widerface.NUM_CLASSES = 2
|
|
|
|
dataset.retinaface = edict()
|
|
dataset.retinaface.dataset = 'retinaface'
|
|
dataset.retinaface.image_set = 'train'
|
|
dataset.retinaface.test_image_set = 'val'
|
|
dataset.retinaface.root_path = 'data'
|
|
dataset.retinaface.dataset_path = 'data/retinaface'
|
|
dataset.retinaface.NUM_CLASSES = 2
|
|
|
|
# default settings
|
|
default = edict()
|
|
|
|
config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling']
|
|
#config.FIXED_PARAMS = ['^.*upsampling']
|
|
#config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3']
|
|
#config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta'] #for resnet
|
|
|
|
# default network
|
|
default.network = 'resnet'
|
|
default.pretrained = 'model/resnet-152'
|
|
#default.network = 'resnetssh'
|
|
default.pretrained_epoch = 0
|
|
# default dataset
|
|
default.dataset = 'retinaface'
|
|
default.image_set = 'train'
|
|
default.test_image_set = 'val'
|
|
default.root_path = 'data'
|
|
default.dataset_path = 'data/retinaface'
|
|
# default training
|
|
default.frequent = 20
|
|
default.kvstore = 'device'
|
|
# default e2e
|
|
default.prefix = 'model/retinaface'
|
|
default.end_epoch = 10000
|
|
default.lr_step = '55,68,80'
|
|
default.lr = 0.01
|
|
default.wd = 0.0005
|
|
|
|
|
|
def generate_config(_network, _dataset):
|
|
for k, v in network[_network].items():
|
|
if k in config:
|
|
config[k] = v
|
|
elif k in default:
|
|
default[k] = v
|
|
if k in config.TRAIN:
|
|
config.TRAIN[k] = v
|
|
for k, v in dataset[_dataset].items():
|
|
if k in config:
|
|
config[k] = v
|
|
elif k in default:
|
|
default[k] = v
|
|
if k in config.TRAIN:
|
|
config.TRAIN[k] = v
|
|
config.network = _network
|
|
config.dataset = _dataset
|
|
config.RPN_FEAT_STRIDE = []
|
|
num_anchors = []
|
|
for k in config.RPN_ANCHOR_CFG:
|
|
config.RPN_FEAT_STRIDE.append(int(k))
|
|
_num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES']) * len(
|
|
config.RPN_ANCHOR_CFG[k]['RATIOS'])
|
|
if config.DENSE_ANCHOR:
|
|
_num_anchors *= 2
|
|
config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors
|
|
num_anchors.append(_num_anchors)
|
|
config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True)
|
|
for j in range(1, len(num_anchors)):
|
|
assert num_anchors[0] == num_anchors[j]
|
|
config.NUM_ANCHORS = num_anchors[0]
|