Files
insightface/detection/RetinaFace/rcnn/sample_config.py
2020-11-06 13:59:21 +08:00

352 lines
9.8 KiB
Python

import numpy as np
from easydict import EasyDict as edict
config = edict()
# network related params
config.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
config.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
config.PIXEL_SCALE = 1.0
config.IMAGE_STRIDE = 0
# dataset related params
config.NUM_CLASSES = 2
config.PRE_SCALES = [(1200, 1600)
] # first is scale (the shorter side); second is max size
config.SCALES = [(640, 640)
] # first is scale (the shorter side); second is max size
#config.SCALES = [(800, 800)] # first is scale (the shorter side); second is max size
config.ORIGIN_SCALE = False
_ratio = (1., )
RAC_SSH = {
'32': {
'SCALES': (32, 16),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'16': {
'SCALES': (8, 4),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'8': {
'SCALES': (2, 1),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
}
_ratio = (1., 1.5)
RAC_SSH2 = {
'32': {
'SCALES': (32, 16),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'16': {
'SCALES': (8, 4),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'8': {
'SCALES': (2, 1),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
}
_ratio = (1., 1.5)
RAC_SSH3 = {
'32': {
'SCALES': (32, 16),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'16': {
'SCALES': (8, 4),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'8': {
'SCALES': (2, 1),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
'4': {
'SCALES': (2, 1),
'BASE_SIZE': 16,
'RATIOS': _ratio,
'ALLOWED_BORDER': 9999
},
}
RAC_RETINA = {}
_ratios = (1.0, )
_ass = 2.0**(1.0 / 3)
_basescale = 1.0
for _stride in [4, 8, 16, 32, 64]:
key = str(_stride)
value = {'BASE_SIZE': 16, 'RATIOS': _ratios, 'ALLOWED_BORDER': 9999}
scales = []
for _ in range(3):
scales.append(_basescale)
_basescale *= _ass
value['SCALES'] = tuple(scales)
RAC_RETINA[key] = value
config.RPN_ANCHOR_CFG = RAC_SSH #default
config.NET_MODE = 2
config.HEAD_MODULE = 'SSH'
#config.HEAD_MODULE = 'RF'
config.LR_MODE = 0
config.LANDMARK_LR_MULT = 2.0
config.HEAD_FILTER_NUM = 256
config.CONTEXT_FILTER_RATIO = 1
config.max_feat_channel = 9999
config.USE_CROP = True
config.USE_FPN = True
config.USE_DCN = 0
config.FACE_LANDMARK = True
config.USE_OCCLUSION = False
config.USE_BLUR = False
config.MORE_SMALL_BOX = True
config.LAYER_FIX = False
config.CASCADE = 0
config.CASCADE_MODE = 1
#config.CASCADE_CLS_STRIDES = [16,8,4]
#config.CASCADE_BBOX_STRIDES = [64,32]
config.CASCADE_CLS_STRIDES = [64, 32, 16, 8, 4]
config.CASCADE_BBOX_STRIDES = [64, 32, 16, 8, 4]
#config.CASCADE_BBOX_STRIDES = [64,32,16,8]
config.HEAD_BOX = False
config.DENSE_ANCHOR = False
config.USE_MAXOUT = 0
config.SHARE_WEIGHT_BBOX = False
config.SHARE_WEIGHT_LANDMARK = False
config.RANDOM_FEAT_STRIDE = False
config.NUM_CPU = 4
config.MIXUP = 0.0
config.USE_3D = False
#config.BBOX_MASK_THRESH = 0
config.COLOR_MODE = 2
config.COLOR_JITTERING = 0.125
#config.COLOR_JITTERING = 0
#config.COLOR_JITTERING = 0.2
config.TRAIN = edict()
config.TRAIN.IMAGE_ALIGN = 0
config.TRAIN.MIN_BOX_SIZE = 0
config.BBOX_MASK_THRESH = config.TRAIN.MIN_BOX_SIZE
# R-CNN and RPN
# size of images for each device, 2 for rcnn, 1 for rpn and e2e
config.TRAIN.BATCH_IMAGES = 8
# e2e changes behavior of anchor loader and metric
config.TRAIN.END2END = True
# group images with similar aspect ratio
config.TRAIN.ASPECT_GROUPING = False
# RPN anchor loader
# rpn anchors batch size
config.TRAIN.RPN_ENABLE_OHEM = 2
config.TRAIN.OHEM_MODE = 1
config.TRAIN.RPN_BATCH_SIZE = 256
# rpn anchors sampling params
config.TRAIN.RPN_FG_FRACTION = 0.25
config.TRAIN.RPN_POSITIVE_OVERLAP = 0.5
config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
if config.CASCADE > 0:
config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
config.TRAIN.CASCADE_OVERLAP = [0.4, 0.5]
config.TRAIN.RPN_CLOBBER_POSITIVES = False
config.TRAIN.RPN_FORCE_POSITIVE = False
# rpn bounding box regression params
config.TRAIN.BBOX_STDS = (1.0, 1.0, 1.0, 1.0)
config.TRAIN.LANDMARK_STD = 1.0
config.TEST = edict()
# R-CNN testing
# use rpn to generate proposal
config.TEST.HAS_RPN = False
# size of images for each device
config.TEST.BATCH_IMAGES = 1
# RPN proposal
config.TEST.CXX_PROPOSAL = True
config.TEST.RPN_NMS_THRESH = 0.3
config.TEST.RPN_PRE_NMS_TOP_N = 1000
config.TEST.RPN_POST_NMS_TOP_N = 3000
#config.TEST.RPN_MIN_SIZE = config.RPN_FEAT_STRIDE
#config.TEST.RPN_MIN_SIZE = [0,0,0]
# RCNN nms
config.TEST.NMS = 0.3
config.TEST.SCORE_THRESH = 0.05
config.TEST.IOU_THRESH = 0.5
# network settings
network = edict()
network.ssh = edict()
network.mnet = edict()
#network.mnet.pretrained = 'model/mnasnet'
#network.mnet.pretrained = 'model/mobilenetv2_0_5'
#network.mnet.pretrained = 'model/mobilenet_0_5'
#network.mnet.MULTIPLIER = 0.5
#network.mnet.pretrained = 'model/mobilenet_0_25'
#network.mnet.pretrained_epoch = 0
#network.mnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
#network.mnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
#network.mnet.PIXEL_SCALE = 255.0
network.mnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
network.mnet.BATCH_IMAGES = 16
network.mnet.HEAD_FILTER_NUM = 64
network.mnet.CONTEXT_FILTER_RATIO = 1
network.mnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
network.mnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
network.mnet.PIXEL_SCALE = 1.0
#network.mnet.pretrained = 'model/mobilenetfd_0_25' #78
#network.mnet.pretrained = 'model/mobilenetfd2' #75
network.mnet.pretrained = 'model/mobilenet025fd0' #78
#network.mnet.pretrained = 'model/mobilenet025fd1' #75
#network.mnet.pretrained = 'model/mobilenet025fd2' #
network.mnet.pretrained_epoch = 0
network.mnet.max_feat_channel = 8888
network.mnet.COLOR_MODE = 1
network.mnet.USE_CROP = True
network.mnet.RPN_ANCHOR_CFG = RAC_SSH
network.mnet.LAYER_FIX = True
network.mnet.LANDMARK_LR_MULT = 2.5
network.resnet = edict()
#network.resnet.pretrained = 'model/ResNet50_v1d'
#network.resnet.pretrained = 'model/resnet-50'
network.resnet.pretrained = 'model/resnet-152'
#network.resnet.pretrained = 'model/senet154'
#network.resnet.pretrained = 'model/densenet161'
network.resnet.pretrained_epoch = 0
#network.mnet.PIXEL_MEANS = np.array([103.939, 116.779, 123.68])
#network.mnet.PIXEL_STDS = np.array([57.375, 57.12, 58.393])
#network.resnet.PIXEL_MEANS = np.array([0.406, 0.456, 0.485])
#network.resnet.PIXEL_STDS = np.array([0.225, 0.224, 0.229])
#network.resnet.PIXEL_SCALE = 255.0
network.resnet.lr_step = '1,2,3,4,5,55,68,80'
network.resnet.lr = 0.001
network.resnet.PIXEL_MEANS = np.array([0.0, 0.0, 0.0])
network.resnet.PIXEL_STDS = np.array([1.0, 1.0, 1.0])
network.resnet.PIXEL_SCALE = 1.0
network.resnet.FIXED_PARAMS = ['^stage1', '^.*upsampling']
network.resnet.BATCH_IMAGES = 8
network.resnet.HEAD_FILTER_NUM = 256
network.resnet.CONTEXT_FILTER_RATIO = 1
network.resnet.USE_DCN = 2
network.resnet.RPN_BATCH_SIZE = 256
network.resnet.RPN_ANCHOR_CFG = RAC_RETINA
network.resnet.USE_DCN = 0
network.resnet.pretrained = 'model/resnet-50'
network.resnet.RPN_ANCHOR_CFG = RAC_SSH
# dataset settings
dataset = edict()
dataset.widerface = edict()
dataset.widerface.dataset = 'widerface'
dataset.widerface.image_set = 'train'
dataset.widerface.test_image_set = 'val'
dataset.widerface.root_path = 'data'
dataset.widerface.dataset_path = 'data/widerface'
dataset.widerface.NUM_CLASSES = 2
dataset.retinaface = edict()
dataset.retinaface.dataset = 'retinaface'
dataset.retinaface.image_set = 'train'
dataset.retinaface.test_image_set = 'val'
dataset.retinaface.root_path = 'data'
dataset.retinaface.dataset_path = 'data/retinaface'
dataset.retinaface.NUM_CLASSES = 2
# default settings
default = edict()
config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3', '^.*upsampling']
#config.FIXED_PARAMS = ['^.*upsampling']
#config.FIXED_PARAMS = ['^conv1', '^conv2', '^conv3']
#config.FIXED_PARAMS = ['^conv0', '^stage1', 'gamma', 'beta'] #for resnet
# default network
default.network = 'resnet'
default.pretrained = 'model/resnet-152'
#default.network = 'resnetssh'
default.pretrained_epoch = 0
# default dataset
default.dataset = 'retinaface'
default.image_set = 'train'
default.test_image_set = 'val'
default.root_path = 'data'
default.dataset_path = 'data/retinaface'
# default training
default.frequent = 20
default.kvstore = 'device'
# default e2e
default.prefix = 'model/retinaface'
default.end_epoch = 10000
default.lr_step = '55,68,80'
default.lr = 0.01
default.wd = 0.0005
def generate_config(_network, _dataset):
for k, v in network[_network].items():
if k in config:
config[k] = v
elif k in default:
default[k] = v
if k in config.TRAIN:
config.TRAIN[k] = v
for k, v in dataset[_dataset].items():
if k in config:
config[k] = v
elif k in default:
default[k] = v
if k in config.TRAIN:
config.TRAIN[k] = v
config.network = _network
config.dataset = _dataset
config.RPN_FEAT_STRIDE = []
num_anchors = []
for k in config.RPN_ANCHOR_CFG:
config.RPN_FEAT_STRIDE.append(int(k))
_num_anchors = len(config.RPN_ANCHOR_CFG[k]['SCALES']) * len(
config.RPN_ANCHOR_CFG[k]['RATIOS'])
if config.DENSE_ANCHOR:
_num_anchors *= 2
config.RPN_ANCHOR_CFG[k]['NUM_ANCHORS'] = _num_anchors
num_anchors.append(_num_anchors)
config.RPN_FEAT_STRIDE = sorted(config.RPN_FEAT_STRIDE, reverse=True)
for j in range(1, len(num_anchors)):
assert num_anchors[0] == num_anchors[j]
config.NUM_ANCHORS = num_anchors[0]