diff --git a/.gitignore b/.gitignore
index 7bbc71c..4f61b8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,3 +99,5 @@ ENV/
# mypy
.mypy_cache/
+
+.DS_Store
diff --git a/README.md b/README.md
index 89751e1..8930133 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,10 @@
# InsightFace: 2D and 3D Face Analysis Project
+
-
+
+
+In this module, we provide datasets and training/inference pipelines for face alignment.
-
-
-
+Supported methods:
+
+- [x] [SDUNets (BMVC'2018)](alignment/heatmap)
+- [x] [SimpleRegression](alignment/coordinate_reg)
+
+
+[SDUNets](alignment/heatmap) is a heatmap based method which accepted on [BMVC](http://bmvc2018.org/contents/papers/0051.pdf).
+
+[SimpleRegression](alignment/coordinate_reg) provides very lightweight facial landmark models with fast coordinate regression. The input of these models is loose cropped face image while the output is the direct landmark coordinates.
## Citation
@@ -312,11 +138,34 @@ For single cropped face image(112x112), total inference time is only 17ms on our
If you find *InsightFace* useful in your research, please consider to cite the following related papers:
```
-@inproceedings{deng2019retinaface,
-title={RetinaFace: Single-stage Dense Face Localisation in the Wild},
-author={Deng, Jiankang and Guo, Jia and Yuxiang, Zhou and Jinke Yu and Irene Kotsia and Zafeiriou, Stefanos},
-booktitle={arxiv},
-year={2019}
+
+@article{guo2021sample,
+ title={Sample and Computation Redistribution for Efficient Face Detection},
+ author={Guo, Jia and Deng, Jiankang and Lattas, Alexandros and Zafeiriou, Stefanos},
+ journal={arXiv preprint arXiv:2105.04714},
+ year={2021}
+}
+
+@inproceedings{an2020partical_fc,
+ title={Partial FC: Training 10 Million Identities on a Single Machine},
+ author={An, Xiang and Zhu, Xuhan and Xiao, Yang and Wu, Lan and Zhang, Ming and Gao, Yuan and Qin, Bin and
+ Zhang, Debing and Fu Ying},
+ booktitle={Arxiv 2010.05222},
+ year={2020}
+}
+
+@inproceedings{deng2020subcenter,
+ title={Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces},
+ author={Deng, Jiankang and Guo, Jia and Liu, Tongliang and Gong, Mingming and Zafeiriou, Stefanos},
+ booktitle={Proceedings of the IEEE Conference on European Conference on Computer Vision},
+ year={2020}
+}
+
+@inproceedings{Deng2020CVPR,
+title = {RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild},
+author = {Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
+booktitle = {CVPR},
+year = {2020}
}
@inproceedings{guo2018stacked,
diff --git a/alignment/README.md b/alignment/README.md
index 9656d13..7b3706b 100644
--- a/alignment/README.md
+++ b/alignment/README.md
@@ -1,4 +1,42 @@
-You can now find heatmap based approaches under ``heatmapReg`` directory.
+## Face Alignment
+
+
+
+
+
+
+
+## Introduction
+
+These are the face alignment methods of [InsightFace](https://insightface.ai)
+
+
+
+
+
+
+
+### Datasets
+
+ Please refer to [datasets](_datasets_) page for the details of face alignment datasets used for training and evaluation.
+
+### Evaluation
+
+ Please refer to [evaluation](_evaluation_) page for the details of face alignment evaluation.
+
+
+## Methods
+
+
+Supported methods:
+
+- [x] [SDUNets (BMVC'2018)](heatmap)
+- [x] [SimpleRegression](coordinate_reg)
+
+
+
+## Contributing
+
+We appreciate all contributions to improve the face alignment model zoo of InsightFace.
-You can now find coordinate regression approaches under ``coordinateReg`` directory.
diff --git a/alignment/coordinateReg/README.md b/alignment/coordinate_reg/README.md
similarity index 100%
rename from alignment/coordinateReg/README.md
rename to alignment/coordinate_reg/README.md
diff --git a/alignment/coordinateReg/image_infer.py b/alignment/coordinate_reg/image_infer.py
similarity index 100%
rename from alignment/coordinateReg/image_infer.py
rename to alignment/coordinate_reg/image_infer.py
diff --git a/alignment/heatmapReg/README.md b/alignment/heatmap/README.md
similarity index 100%
rename from alignment/heatmapReg/README.md
rename to alignment/heatmap/README.md
diff --git a/alignment/heatmapReg/data.py b/alignment/heatmap/data.py
similarity index 100%
rename from alignment/heatmapReg/data.py
rename to alignment/heatmap/data.py
diff --git a/alignment/heatmapReg/img_helper.py b/alignment/heatmap/img_helper.py
similarity index 100%
rename from alignment/heatmapReg/img_helper.py
rename to alignment/heatmap/img_helper.py
diff --git a/alignment/heatmapReg/metric.py b/alignment/heatmap/metric.py
similarity index 100%
rename from alignment/heatmapReg/metric.py
rename to alignment/heatmap/metric.py
diff --git a/alignment/heatmapReg/optimizer.py b/alignment/heatmap/optimizer.py
similarity index 100%
rename from alignment/heatmapReg/optimizer.py
rename to alignment/heatmap/optimizer.py
diff --git a/alignment/heatmapReg/sample_config.py b/alignment/heatmap/sample_config.py
similarity index 100%
rename from alignment/heatmapReg/sample_config.py
rename to alignment/heatmap/sample_config.py
diff --git a/alignment/heatmapReg/symbol/sym_heatmap.py b/alignment/heatmap/symbol/sym_heatmap.py
similarity index 100%
rename from alignment/heatmapReg/symbol/sym_heatmap.py
rename to alignment/heatmap/symbol/sym_heatmap.py
diff --git a/alignment/heatmapReg/test.py b/alignment/heatmap/test.py
similarity index 100%
rename from alignment/heatmapReg/test.py
rename to alignment/heatmap/test.py
diff --git a/alignment/heatmapReg/test_rec_nme.py b/alignment/heatmap/test_rec_nme.py
similarity index 100%
rename from alignment/heatmapReg/test_rec_nme.py
rename to alignment/heatmap/test_rec_nme.py
diff --git a/alignment/heatmapReg/train.py b/alignment/heatmap/train.py
similarity index 100%
rename from alignment/heatmapReg/train.py
rename to alignment/heatmap/train.py
diff --git a/attribute/README.md b/attribute/README.md
new file mode 100644
index 0000000..1a8379c
--- /dev/null
+++ b/attribute/README.md
@@ -0,0 +1,41 @@
+## Face Alignment
+
+
+
+
+
+
+
+## Introduction
+
+These are the face attribute methods of [InsightFace](https://insightface.ai)
+
+
+
+
+
+
+
+### Datasets
+
+ Please refer to [datasets](_datasets_) page for the details of face attribute datasets used for training and evaluation.
+
+### Evaluation
+
+ Please refer to [evaluation](_evaluation_) page for the details of face attribute evaluation.
+
+
+## Methods
+
+
+Supported methods:
+
+- [x] [Gender_Age](gender_age)
+
+
+
+## Contributing
+
+We appreciate all contributions to improve the face attribute model zoo of InsightFace.
+
+
diff --git a/deploy/test.py b/attribute/gender_age/test.py
similarity index 53%
rename from deploy/test.py
rename to attribute/gender_age/test.py
index f5ecdf2..a92b216 100644
--- a/deploy/test.py
+++ b/attribute/gender_age/test.py
@@ -4,25 +4,21 @@ import sys
import numpy as np
import insightface
from insightface.app import FaceAnalysis
+from insightface.data import get_image as ins_get_image
-assert insightface.__version__>='0.2'
-parser = argparse.ArgumentParser(description='insightface test')
+parser = argparse.ArgumentParser(description='insightface gender-age test')
# general
parser.add_argument('--ctx', default=0, type=int, help='ctx id, <0 means using cpu')
args = parser.parse_args()
-app = FaceAnalysis(name='antelope')
+app = FaceAnalysis(allowed_modules=['detection', 'genderage'])
app.prepare(ctx_id=args.ctx, det_size=(640,640))
-img = cv2.imread('../sample-images/t1.jpg')
+img = ins_get_image('t1')
faces = app.get(img)
assert len(faces)==6
-rimg = app.draw_on(img, faces)
-cv2.imwrite("./t1_output.jpg", rimg)
-print(len(faces))
for face in faces:
print(face.bbox)
- print(face.kps)
- print(face.embedding.shape)
+ print(face.sex, face.age)
diff --git a/challenges/README.md b/challenges/README.md
new file mode 100644
index 0000000..b2422cc
--- /dev/null
+++ b/challenges/README.md
@@ -0,0 +1,31 @@
+## Challenges
+
+
+
+
+
+
+
+## Introduction
+
+These are challenges hold by [InsightFace](https://insightface.ai)
+
+
+
+
+
+
+
+
+## List
+
+
+Supported methods:
+
+- [LFR19 (ICCVW'2019)](iccv19-lfr)
+- [MFR21 (ICCVW'2021)](iccv21-mfr)
+- [IFRT](ifrt)
+
+
+
+
diff --git a/challenges/iccv19-lfr/README.md b/challenges/iccv19-lfr/README.md
index f9ae0e0..12aa535 100644
--- a/challenges/iccv19-lfr/README.md
+++ b/challenges/iccv19-lfr/README.md
@@ -31,7 +31,7 @@ insightface.challenge@gmail.com
*For Chinese:*
-
+
*For English:*
diff --git a/deploy/README.md b/deploy/README.md
deleted file mode 100644
index e65643d..0000000
--- a/deploy/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-InsightFace deployment README
----
-
-For insightface pip-package <= 0.1.5, we use MXNet as inference backend, please download all models from [onedrive](https://1drv.ms/u/s!AswpsDO2toNKrUy0VktHTWgIQ0bn?e=UEF7C4), and put them all under `~/.insightface/models/` directory.
-
-Starting from insightface>=0.2, we use onnxruntime as inference backend, please download our **antelope** model release from [onedrive](https://1drv.ms/u/s!AswpsDO2toNKrU0ydGgDkrHPdJ3m?e=iVgZox), and put it under `~/.insightface/models/`, so there're onnx models at `~/.insightface/models/antelope/*.onnx`.
-
-The **antelope** model release contains `ResNet100@Glint360K recognition model` and `SCRFD-10GF detection model`. Please check `test.py` for detail.
diff --git a/deploy/convert_onnx.py b/deploy/convert_onnx.py
deleted file mode 100644
index 3ed583d..0000000
--- a/deploy/convert_onnx.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import sys
-import os
-import argparse
-import onnx
-import mxnet as mx
-
-print('mxnet version:', mx.__version__)
-print('onnx version:', onnx.__version__)
-#make sure to install onnx-1.2.1
-#pip uninstall onnx
-#pip install onnx==1.2.1
-assert onnx.__version__ == '1.2.1'
-import numpy as np
-from mxnet.contrib import onnx as onnx_mxnet
-
-parser = argparse.ArgumentParser(
- description='convert insightface models to onnx')
-# general
-parser.add_argument('--prefix',
- default='./r100-arcface/model',
- help='prefix to load model.')
-parser.add_argument('--epoch',
- default=0,
- type=int,
- help='epoch number to load model.')
-parser.add_argument('--input-shape', default='3,112,112', help='input shape.')
-parser.add_argument('--output-onnx',
- default='./r100.onnx',
- help='path to write onnx model.')
-args = parser.parse_args()
-input_shape = (1, ) + tuple([int(x) for x in args.input_shape.split(',')])
-print('input-shape:', input_shape)
-
-sym_file = "%s-symbol.json" % args.prefix
-params_file = "%s-%04d.params" % (args.prefix, args.epoch)
-assert os.path.exists(sym_file)
-assert os.path.exists(params_file)
-converted_model_path = onnx_mxnet.export_model(sym_file, params_file,
- [input_shape], np.float32,
- args.output_onnx)
diff --git a/deploy/face_model.py b/deploy/face_model.py
deleted file mode 100644
index 2e4a361..0000000
--- a/deploy/face_model.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import os
-import argparse
-import numpy as np
-import mxnet as mx
-import cv2
-import insightface
-from insightface.utils import face_align
-
-
-def do_flip(data):
- for idx in range(data.shape[0]):
- data[idx, :, :] = np.fliplr(data[idx, :, :])
-
-
-def get_model(ctx, image_size, prefix, epoch, layer):
- print('loading', prefix, epoch)
- sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
- all_layers = sym.get_internals()
- sym = all_layers[layer + '_output']
- model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
- #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
- model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
- model.set_params(arg_params, aux_params)
- return model
-
-
-class FaceModel:
- def __init__(self, ctx_id, model_prefix, model_epoch, use_large_detector=False):
- if use_large_detector:
- self.detector = insightface.model_zoo.get_model('retinaface_r50_v1')
- else:
- self.detector = insightface.model_zoo.get_model('retinaface_mnet025_v2')
- self.detector.prepare(ctx_id=ctx_id)
- if ctx_id>=0:
- ctx = mx.gpu(ctx_id)
- else:
- ctx = mx.cpu()
- image_size = (112,112)
- self.model = get_model(ctx, image_size, model_prefix, model_epoch, 'fc1')
- self.image_size = image_size
-
- def get_input(self, face_img):
- bbox, pts5 = self.detector.detect(face_img, threshold=0.8)
- if bbox.shape[0]==0:
- return None
- bbox = bbox[0, 0:4]
- pts5 = pts5[0, :]
- nimg = face_align.norm_crop(face_img, pts5)
- return nimg
-
- def get_feature(self, aligned):
- a = cv2.cvtColor(aligned, cv2.COLOR_BGR2RGB)
- a = np.transpose(a, (2, 0, 1))
- input_blob = np.expand_dims(a, axis=0)
- data = mx.nd.array(input_blob)
- db = mx.io.DataBatch(data=(data, ))
- self.model.forward(db, is_train=False)
- emb = self.model.get_outputs()[0].asnumpy()[0]
- norm = np.sqrt(np.sum(emb*emb)+0.00001)
- emb /= norm
- return emb
-
diff --git a/deploy/model_slim.py b/deploy/model_slim.py
deleted file mode 100644
index 421b0cd..0000000
--- a/deploy/model_slim.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import os
-import argparse
-import numpy as np
-import mxnet as mx
-
-parser = argparse.ArgumentParser(description='face model slim')
-# general
-parser.add_argument('--model',
- default='../models/model-r34-amf/model,60',
- help='path to load model.')
-args = parser.parse_args()
-
-_vec = args.model.split(',')
-assert len(_vec) == 2
-prefix = _vec[0]
-epoch = int(_vec[1])
-print('loading', prefix, epoch)
-sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
-all_layers = sym.get_internals()
-sym = all_layers['fc1_output']
-dellist = []
-for k, v in arg_params.iteritems():
- if k.startswith('fc7'):
- dellist.append(k)
-for d in dellist:
- del arg_params[d]
-mx.model.save_checkpoint(prefix + "s", 0, sym, arg_params, aux_params)
diff --git a/deploy/mtcnn-model/det1-0001.params b/deploy/mtcnn-model/det1-0001.params
deleted file mode 100644
index e4b04aa..0000000
Binary files a/deploy/mtcnn-model/det1-0001.params and /dev/null differ
diff --git a/deploy/mtcnn-model/det1-symbol.json b/deploy/mtcnn-model/det1-symbol.json
deleted file mode 100644
index bd9b772..0000000
--- a/deploy/mtcnn-model/det1-symbol.json
+++ /dev/null
@@ -1,266 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "10",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(2,2)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "16",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[11, 0], [12, 0], [13, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[14, 0], [15, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(1,1)",
- "no_bias": "False",
- "num_filter": "4",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4_2",
- "inputs": [[16, 0], [17, 0], [18, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(1,1)",
- "no_bias": "False",
- "num_filter": "2",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4_1",
- "inputs": [[16, 0], [20, 0], [21, 0]],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxActivation",
- "param": {"mode": "channel"},
- "name": "prob1",
- "inputs": [[22, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 12,
- 13,
- 15,
- 17,
- 18,
- 20,
- 21
- ],
- "heads": [[19, 0], [23, 0]]
-}
\ No newline at end of file
diff --git a/deploy/mtcnn-model/det1.caffemodel b/deploy/mtcnn-model/det1.caffemodel
deleted file mode 100644
index 79e93b4..0000000
Binary files a/deploy/mtcnn-model/det1.caffemodel and /dev/null differ
diff --git a/deploy/mtcnn-model/det1.prototxt b/deploy/mtcnn-model/det1.prototxt
deleted file mode 100644
index c5c1657..0000000
--- a/deploy/mtcnn-model/det1.prototxt
+++ /dev/null
@@ -1,177 +0,0 @@
-name: "PNet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 12
-input_dim: 12
-
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 10
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 2
- stride: 2
- }
-}
-
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 16
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
-}
-
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "conv2"
- top: "conv3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 32
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
-}
-
-
-layer {
- name: "conv4-1"
- type: "Convolution"
- bottom: "conv3"
- top: "conv4-1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 2
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "conv4-2"
- type: "Convolution"
- bottom: "conv3"
- top: "conv4-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 4
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv4-1"
- top: "prob1"
-}
diff --git a/deploy/mtcnn-model/det2-0001.params b/deploy/mtcnn-model/det2-0001.params
deleted file mode 100644
index a14a478..0000000
Binary files a/deploy/mtcnn-model/det2-0001.params and /dev/null differ
diff --git a/deploy/mtcnn-model/det2-symbol.json b/deploy/mtcnn-model/det2-symbol.json
deleted file mode 100644
index a13246a..0000000
--- a/deploy/mtcnn-model/det2-symbol.json
+++ /dev/null
@@ -1,324 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2",
- "inputs": [[11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[12, 0], [13, 0], [14, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[15, 0], [16, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "128"
- },
- "name": "conv4",
- "inputs": [[17, 0], [18, 0], [19, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[20, 0], [21, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "4"
- },
- "name": "conv5_2",
- "inputs": [[22, 0], [23, 0], [24, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "conv5_1",
- "inputs": [[22, 0], [26, 0], [27, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prob1_label",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxOutput",
- "param": {
- "grad_scale": "1",
- "ignore_label": "-1",
- "multi_output": "False",
- "normalization": "null",
- "use_ignore": "False"
- },
- "name": "prob1",
- "inputs": [[28, 0], [29, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 13,
- 14,
- 16,
- 18,
- 19,
- 21,
- 23,
- 24,
- 26,
- 27,
- 29
- ],
- "heads": [[25, 0], [30, 0]]
-}
\ No newline at end of file
diff --git a/deploy/mtcnn-model/det2.caffemodel b/deploy/mtcnn-model/det2.caffemodel
deleted file mode 100644
index a5a540c..0000000
Binary files a/deploy/mtcnn-model/det2.caffemodel and /dev/null differ
diff --git a/deploy/mtcnn-model/det2.prototxt b/deploy/mtcnn-model/det2.prototxt
deleted file mode 100644
index 51093e6..0000000
--- a/deploy/mtcnn-model/det2.prototxt
+++ /dev/null
@@ -1,228 +0,0 @@
-name: "RNet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 24
-input_dim: 24
-
-
-##########################
-######################
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
- propagate_down: true
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
- propagate_down: true
-}
-layer {
- name: "pool2"
- type: "Pooling"
- bottom: "conv2"
- top: "pool2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-####################################
-
-##################################
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "pool2"
- top: "conv3"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
- propagate_down: true
-}
-###############################
-
-###############################
-
-layer {
- name: "conv4"
- type: "InnerProduct"
- bottom: "conv3"
- top: "conv4"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- inner_product_param {
- num_output: 128
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "conv4"
- top: "conv4"
-}
-
-layer {
- name: "conv5-1"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5-1"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- inner_product_param {
- num_output: 2
- #kernel_size: 1
- #stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv5-2"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 4
- #kernel_size: 1
- #stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv5-1"
- top: "prob1"
-}
\ No newline at end of file
diff --git a/deploy/mtcnn-model/det3-0001.params b/deploy/mtcnn-model/det3-0001.params
deleted file mode 100644
index cae898b..0000000
Binary files a/deploy/mtcnn-model/det3-0001.params and /dev/null differ
diff --git a/deploy/mtcnn-model/det3-symbol.json b/deploy/mtcnn-model/det3-symbol.json
deleted file mode 100644
index 00061ed..0000000
--- a/deploy/mtcnn-model/det3-symbol.json
+++ /dev/null
@@ -1,418 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2",
- "inputs": [[11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[12, 0], [13, 0], [14, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[15, 0], [16, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(2,2)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool3",
- "inputs": [[17, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4",
- "inputs": [[18, 0], [19, 0], [20, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[21, 0], [22, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "256"
- },
- "name": "conv5",
- "inputs": [[23, 0], [24, 0], [25, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu5",
- "inputs": [[26, 0], [27, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "10"
- },
- "name": "conv6_3",
- "inputs": [[28, 0], [29, 0], [30, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "4"
- },
- "name": "conv6_2",
- "inputs": [[28, 0], [32, 0], [33, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "conv6_1",
- "inputs": [[28, 0], [35, 0], [36, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prob1_label",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxOutput",
- "param": {
- "grad_scale": "1",
- "ignore_label": "-1",
- "multi_output": "False",
- "normalization": "null",
- "use_ignore": "False"
- },
- "name": "prob1",
- "inputs": [[37, 0], [38, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 13,
- 14,
- 16,
- 19,
- 20,
- 22,
- 24,
- 25,
- 27,
- 29,
- 30,
- 32,
- 33,
- 35,
- 36,
- 38
- ],
- "heads": [[31, 0], [34, 0], [39, 0]]
-}
\ No newline at end of file
diff --git a/deploy/mtcnn-model/det3.caffemodel b/deploy/mtcnn-model/det3.caffemodel
deleted file mode 100644
index 7b4b8a4..0000000
Binary files a/deploy/mtcnn-model/det3.caffemodel and /dev/null differ
diff --git a/deploy/mtcnn-model/det3.prototxt b/deploy/mtcnn-model/det3.prototxt
deleted file mode 100644
index a192307..0000000
--- a/deploy/mtcnn-model/det3.prototxt
+++ /dev/null
@@ -1,294 +0,0 @@
-name: "ONet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 48
-input_dim: 48
-##################################
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 32
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "prelu2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
-}
-layer {
- name: "pool2"
- type: "Pooling"
- bottom: "conv2"
- top: "pool2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "pool2"
- top: "conv3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 3
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
-}
-layer {
- name: "pool3"
- type: "Pooling"
- bottom: "conv3"
- top: "pool3"
- pooling_param {
- pool: MAX
- kernel_size: 2
- stride: 2
- }
-}
-layer {
- name: "conv4"
- type: "Convolution"
- bottom: "pool3"
- top: "conv4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 128
- kernel_size: 2
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "conv4"
- top: "conv4"
-}
-
-
-layer {
- name: "conv5"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 3
- num_output: 256
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "drop5"
- type: "Dropout"
- bottom: "conv5"
- top: "conv5"
- dropout_param {
- dropout_ratio: 0.25
- }
-}
-layer {
- name: "prelu5"
- type: "PReLU"
- bottom: "conv5"
- top: "conv5"
-}
-
-
-layer {
- name: "conv6-1"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 2
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv6-2"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 4
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv6-3"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 10
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv6-1"
- top: "prob1"
-}
diff --git a/deploy/mtcnn-model/det4-0001.params b/deploy/mtcnn-model/det4-0001.params
deleted file mode 100644
index efca9a9..0000000
Binary files a/deploy/mtcnn-model/det4-0001.params and /dev/null differ
diff --git a/deploy/mtcnn-model/det4-symbol.json b/deploy/mtcnn-model/det4-symbol.json
deleted file mode 100644
index aa90e2a..0000000
--- a/deploy/mtcnn-model/det4-symbol.json
+++ /dev/null
@@ -1,1392 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SliceChannel",
- "param": {
- "axis": "1",
- "num_outputs": "5",
- "squeeze_axis": "False"
- },
- "name": "slice",
- "inputs": [[0, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_1",
- "inputs": [[1, 0], [2, 0], [3, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_1",
- "inputs": [[4, 0], [5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_1",
- "inputs": [[6, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_1",
- "inputs": [[7, 0], [8, 0], [9, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_1",
- "inputs": [[10, 0], [11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_1",
- "inputs": [[12, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_1",
- "inputs": [[13, 0], [14, 0], [15, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_1",
- "inputs": [[16, 0], [17, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_2",
- "inputs": [[1, 1], [19, 0], [20, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_2",
- "inputs": [[21, 0], [22, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_2",
- "inputs": [[23, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_2",
- "inputs": [[24, 0], [25, 0], [26, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_2",
- "inputs": [[27, 0], [28, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_2",
- "inputs": [[29, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_2",
- "inputs": [[30, 0], [31, 0], [32, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_2",
- "inputs": [[33, 0], [34, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_3",
- "inputs": [[1, 2], [36, 0], [37, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_3",
- "inputs": [[38, 0], [39, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_3",
- "inputs": [[40, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_3",
- "inputs": [[41, 0], [42, 0], [43, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_3",
- "inputs": [[44, 0], [45, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_3",
- "inputs": [[46, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_3",
- "inputs": [[47, 0], [48, 0], [49, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_3",
- "inputs": [[50, 0], [51, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_4",
- "inputs": [[1, 3], [53, 0], [54, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_4",
- "inputs": [[55, 0], [56, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_4",
- "inputs": [[57, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_4",
- "inputs": [[58, 0], [59, 0], [60, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_4",
- "inputs": [[61, 0], [62, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_4",
- "inputs": [[63, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_4",
- "inputs": [[64, 0], [65, 0], [66, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_4",
- "inputs": [[67, 0], [68, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_5",
- "inputs": [[1, 4], [70, 0], [71, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_5",
- "inputs": [[72, 0], [73, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_5",
- "inputs": [[74, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_5",
- "inputs": [[75, 0], [76, 0], [77, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_5",
- "inputs": [[78, 0], [79, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_5",
- "inputs": [[80, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_5",
- "inputs": [[81, 0], [82, 0], [83, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_5",
- "inputs": [[84, 0], [85, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Concat",
- "param": {
- "dim": "1",
- "num_args": "5"
- },
- "name": "concat",
- "inputs": [[18, 0], [35, 0], [52, 0], [69, 0], [86, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "256"
- },
- "name": "fc4",
- "inputs": [[87, 0], [88, 0], [89, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[90, 0], [91, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_1",
- "inputs": [[92, 0], [93, 0], [94, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_1",
- "inputs": [[95, 0], [96, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_1",
- "inputs": [[97, 0], [98, 0], [99, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_2",
- "inputs": [[92, 0], [101, 0], [102, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_2",
- "inputs": [[103, 0], [104, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_2",
- "inputs": [[105, 0], [106, 0], [107, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_3",
- "inputs": [[92, 0], [109, 0], [110, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_3",
- "inputs": [[111, 0], [112, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_3",
- "inputs": [[113, 0], [114, 0], [115, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_4",
- "inputs": [[92, 0], [117, 0], [118, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_4",
- "inputs": [[119, 0], [120, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_4",
- "inputs": [[121, 0], [122, 0], [123, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_5",
- "inputs": [[92, 0], [125, 0], [126, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_5",
- "inputs": [[127, 0], [128, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_5",
- "inputs": [[129, 0], [130, 0], [131, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 2,
- 3,
- 5,
- 8,
- 9,
- 11,
- 14,
- 15,
- 17,
- 19,
- 20,
- 22,
- 25,
- 26,
- 28,
- 31,
- 32,
- 34,
- 36,
- 37,
- 39,
- 42,
- 43,
- 45,
- 48,
- 49,
- 51,
- 53,
- 54,
- 56,
- 59,
- 60,
- 62,
- 65,
- 66,
- 68,
- 70,
- 71,
- 73,
- 76,
- 77,
- 79,
- 82,
- 83,
- 85,
- 88,
- 89,
- 91,
- 93,
- 94,
- 96,
- 98,
- 99,
- 101,
- 102,
- 104,
- 106,
- 107,
- 109,
- 110,
- 112,
- 114,
- 115,
- 117,
- 118,
- 120,
- 122,
- 123,
- 125,
- 126,
- 128,
- 130,
- 131
- ],
- "heads": [[100, 0], [108, 0], [116, 0], [124, 0], [132, 0]]
-}
\ No newline at end of file
diff --git a/deploy/mtcnn-model/det4.caffemodel b/deploy/mtcnn-model/det4.caffemodel
deleted file mode 100644
index 38353c4..0000000
Binary files a/deploy/mtcnn-model/det4.caffemodel and /dev/null differ
diff --git a/deploy/mtcnn-model/det4.prototxt b/deploy/mtcnn-model/det4.prototxt
deleted file mode 100644
index 4cdc329..0000000
--- a/deploy/mtcnn-model/det4.prototxt
+++ /dev/null
@@ -1,995 +0,0 @@
-name: "LNet"
-input: "data"
-input_dim: 1
-input_dim: 15
-input_dim: 24
-input_dim: 24
-
-layer {
- name: "slicer_data"
- type: "Slice"
- bottom: "data"
- top: "data241"
- top: "data242"
- top: "data243"
- top: "data244"
- top: "data245"
- slice_param {
- axis: 1
- slice_point: 3
- slice_point: 6
- slice_point: 9
- slice_point: 12
- }
-}
-layer {
- name: "conv1_1"
- type: "Convolution"
- bottom: "data241"
- top: "conv1_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_1"
- type: "PReLU"
- bottom: "conv1_1"
- top: "conv1_1"
-
-}
-layer {
- name: "pool1_1"
- type: "Pooling"
- bottom: "conv1_1"
- top: "pool1_1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_1"
- type: "Convolution"
- bottom: "pool1_1"
- top: "conv2_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_1"
- type: "PReLU"
- bottom: "conv2_1"
- top: "conv2_1"
-}
-layer {
- name: "pool2_1"
- type: "Pooling"
- bottom: "conv2_1"
- top: "pool2_1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_1"
- type: "Convolution"
- bottom: "pool2_1"
- top: "conv3_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_1"
- type: "PReLU"
- bottom: "conv3_1"
- top: "conv3_1"
-}
-##########################
-layer {
- name: "conv1_2"
- type: "Convolution"
- bottom: "data242"
- top: "conv1_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_2"
- type: "PReLU"
- bottom: "conv1_2"
- top: "conv1_2"
-
-}
-layer {
- name: "pool1_2"
- type: "Pooling"
- bottom: "conv1_2"
- top: "pool1_2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_2"
- type: "Convolution"
- bottom: "pool1_2"
- top: "conv2_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_2"
- type: "PReLU"
- bottom: "conv2_2"
- top: "conv2_2"
-}
-layer {
- name: "pool2_2"
- type: "Pooling"
- bottom: "conv2_2"
- top: "pool2_2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_2"
- type: "Convolution"
- bottom: "pool2_2"
- top: "conv3_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_2"
- type: "PReLU"
- bottom: "conv3_2"
- top: "conv3_2"
-}
-##########################
-##########################
-layer {
- name: "conv1_3"
- type: "Convolution"
- bottom: "data243"
- top: "conv1_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_3"
- type: "PReLU"
- bottom: "conv1_3"
- top: "conv1_3"
-
-}
-layer {
- name: "pool1_3"
- type: "Pooling"
- bottom: "conv1_3"
- top: "pool1_3"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_3"
- type: "Convolution"
- bottom: "pool1_3"
- top: "conv2_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_3"
- type: "PReLU"
- bottom: "conv2_3"
- top: "conv2_3"
-}
-layer {
- name: "pool2_3"
- type: "Pooling"
- bottom: "conv2_3"
- top: "pool2_3"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_3"
- type: "Convolution"
- bottom: "pool2_3"
- top: "conv3_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_3"
- type: "PReLU"
- bottom: "conv3_3"
- top: "conv3_3"
-}
-##########################
-##########################
-layer {
- name: "conv1_4"
- type: "Convolution"
- bottom: "data244"
- top: "conv1_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_4"
- type: "PReLU"
- bottom: "conv1_4"
- top: "conv1_4"
-
-}
-layer {
- name: "pool1_4"
- type: "Pooling"
- bottom: "conv1_4"
- top: "pool1_4"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_4"
- type: "Convolution"
- bottom: "pool1_4"
- top: "conv2_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_4"
- type: "PReLU"
- bottom: "conv2_4"
- top: "conv2_4"
-}
-layer {
- name: "pool2_4"
- type: "Pooling"
- bottom: "conv2_4"
- top: "pool2_4"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_4"
- type: "Convolution"
- bottom: "pool2_4"
- top: "conv3_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_4"
- type: "PReLU"
- bottom: "conv3_4"
- top: "conv3_4"
-}
-##########################
-##########################
-layer {
- name: "conv1_5"
- type: "Convolution"
- bottom: "data245"
- top: "conv1_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_5"
- type: "PReLU"
- bottom: "conv1_5"
- top: "conv1_5"
-
-}
-layer {
- name: "pool1_5"
- type: "Pooling"
- bottom: "conv1_5"
- top: "pool1_5"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_5"
- type: "Convolution"
- bottom: "pool1_5"
- top: "conv2_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_5"
- type: "PReLU"
- bottom: "conv2_5"
- top: "conv2_5"
-}
-layer {
- name: "pool2_5"
- type: "Pooling"
- bottom: "conv2_5"
- top: "pool2_5"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_5"
- type: "Convolution"
- bottom: "pool2_5"
- top: "conv3_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_5"
- type: "PReLU"
- bottom: "conv3_5"
- top: "conv3_5"
-}
-##########################
-layer {
- name: "concat"
- bottom: "conv3_1"
- bottom: "conv3_2"
- bottom: "conv3_3"
- bottom: "conv3_4"
- bottom: "conv3_5"
- top: "conv3"
- type: "Concat"
- concat_param {
- axis: 1
- }
-}
-##########################
-layer {
- name: "fc4"
- type: "InnerProduct"
- bottom: "conv3"
- top: "fc4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 256
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "fc4"
- top: "fc4"
-}
-############################
-layer {
- name: "fc4_1"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_1"
- type: "PReLU"
- bottom: "fc4_1"
- top: "fc4_1"
-}
-layer {
- name: "fc5_1"
- type: "InnerProduct"
- bottom: "fc4_1"
- top: "fc5_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-
-#########################
-layer {
- name: "fc4_2"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_2"
- type: "PReLU"
- bottom: "fc4_2"
- top: "fc4_2"
-}
-layer {
- name: "fc5_2"
- type: "InnerProduct"
- bottom: "fc4_2"
- top: "fc5_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_3"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_3"
- type: "PReLU"
- bottom: "fc4_3"
- top: "fc4_3"
-}
-layer {
- name: "fc5_3"
- type: "InnerProduct"
- bottom: "fc4_3"
- top: "fc5_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_4"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_4"
- type: "PReLU"
- bottom: "fc4_4"
- top: "fc4_4"
-}
-layer {
- name: "fc5_4"
- type: "InnerProduct"
- bottom: "fc4_4"
- top: "fc5_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_5"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_5"
- type: "PReLU"
- bottom: "fc4_5"
- top: "fc4_5"
-}
-layer {
- name: "fc5_5"
- type: "InnerProduct"
- bottom: "fc4_5"
- top: "fc5_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-
diff --git a/deploy/mtcnn_detector.py b/deploy/mtcnn_detector.py
deleted file mode 100644
index 8fa22f2..0000000
--- a/deploy/mtcnn_detector.py
+++ /dev/null
@@ -1,864 +0,0 @@
-# coding: utf-8
-import os
-import mxnet as mx
-import numpy as np
-import math
-import cv2
-from multiprocessing import Pool
-from itertools import repeat
-try:
- from itertools import izip
-except ImportError:
- izip = zip
-
-def nms(boxes, overlap_threshold, mode='Union'):
- """
- non max suppression
-
- Parameters:
- ----------
- box: numpy array n x 5
- input bbox array
- overlap_threshold: float number
- threshold of overlap
- mode: float number
- how to compute overlap ratio, 'Union' or 'Min'
- Returns:
- -------
- index array of the selected bbox
- """
- # if there are no boxes, return an empty list
- if len(boxes) == 0:
- return []
-
- # if the bounding boxes integers, convert them to floats
- if boxes.dtype.kind == "i":
- boxes = boxes.astype("float")
-
- # initialize the list of picked indexes
- pick = []
-
- # grab the coordinates of the bounding boxes
- x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
-
- area = (x2 - x1 + 1) * (y2 - y1 + 1)
- idxs = np.argsort(score)
-
- # keep looping while some indexes still remain in the indexes list
- while len(idxs) > 0:
- # grab the last index in the indexes list and add the index value to the list of picked indexes
- last = len(idxs) - 1
- i = idxs[last]
- pick.append(i)
-
- xx1 = np.maximum(x1[i], x1[idxs[:last]])
- yy1 = np.maximum(y1[i], y1[idxs[:last]])
- xx2 = np.minimum(x2[i], x2[idxs[:last]])
- yy2 = np.minimum(y2[i], y2[idxs[:last]])
-
- # compute the width and height of the bounding box
- w = np.maximum(0, xx2 - xx1 + 1)
- h = np.maximum(0, yy2 - yy1 + 1)
-
- inter = w * h
- if mode == 'Min':
- overlap = inter / np.minimum(area[i], area[idxs[:last]])
- else:
- overlap = inter / (area[i] + area[idxs[:last]] - inter)
-
- # delete all indexes from the index list that have
- idxs = np.delete(
- idxs,
- np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
-
- return pick
-
-
-def adjust_input(in_data):
- """
- adjust the input from (h, w, c) to ( 1, c, h, w) for network input
-
- Parameters:
- ----------
- in_data: numpy array of shape (h, w, c)
- input data
- Returns:
- -------
- out_data: numpy array of shape (1, c, h, w)
- reshaped array
- """
- if in_data.dtype is not np.dtype('float32'):
- out_data = in_data.astype(np.float32)
- else:
- out_data = in_data
-
- out_data = out_data.transpose((2, 0, 1))
- out_data = np.expand_dims(out_data, 0)
- out_data = (out_data - 127.5) * 0.0078125
- return out_data
-
-
-def generate_bbox(map, reg, scale, threshold):
- """
- generate bbox from feature map
- Parameters:
- ----------
- map: numpy array , n x m x 1
- detect score for each position
- reg: numpy array , n x m x 4
- bbox
- scale: float number
- scale of this detection
- threshold: float number
- detect threshold
- Returns:
- -------
- bbox array
- """
- stride = 2
- cellsize = 12
-
- t_index = np.where(map > threshold)
-
- # find nothing
- if t_index[0].size == 0:
- return np.array([])
-
- dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
-
- reg = np.array([dx1, dy1, dx2, dy2])
- score = map[t_index[0], t_index[1]]
- boundingbox = np.vstack([
- np.round((stride * t_index[1] + 1) / scale),
- np.round((stride * t_index[0] + 1) / scale),
- np.round((stride * t_index[1] + 1 + cellsize) / scale),
- np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
- ])
-
- return boundingbox.T
-
-
-def detect_first_stage(img, net, scale, threshold):
- """
- run PNet for first stage
-
- Parameters:
- ----------
- img: numpy array, bgr order
- input image
- scale: float number
- how much should the input image scale
- net: PNet
- worker
- Returns:
- -------
- total_boxes : bboxes
- """
- height, width, _ = img.shape
- hs = int(math.ceil(height * scale))
- ws = int(math.ceil(width * scale))
-
- im_data = cv2.resize(img, (ws, hs))
-
- # adjust for the network input
- input_buf = adjust_input(im_data)
- output = net.predict(input_buf)
- boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
-
- if boxes.size == 0:
- return None
-
- # nms
- pick = nms(boxes[:, 0:5], 0.5, mode='Union')
- boxes = boxes[pick]
- return boxes
-
-
-def detect_first_stage_warpper(args):
- return detect_first_stage(*args)
-
-class MtcnnDetector(object):
- """
- Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
- see https://github.com/kpzhang93/MTCNN_face_detection_alignment
- this is a mxnet version
- """
- def __init__(self,
- model_folder='.',
- minsize=20,
- threshold=[0.6, 0.7, 0.8],
- factor=0.709,
- num_worker=1,
- accurate_landmark=False,
- ctx=mx.cpu()):
- """
- Initialize the detector
-
- Parameters:
- ----------
- model_folder : string
- path for the models
- minsize : float number
- minimal face to detect
- threshold : float number
- detect threshold for 3 stages
- factor: float number
- scale factor for image pyramid
- num_worker: int number
- number of processes we use for first stage
- accurate_landmark: bool
- use accurate landmark localization or not
-
- """
- self.num_worker = num_worker
- self.accurate_landmark = accurate_landmark
-
- # load 4 models from folder
- models = ['det1', 'det2', 'det3', 'det4']
- models = [os.path.join(model_folder, f) for f in models]
-
- self.PNets = []
- for i in range(num_worker):
- workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
- self.PNets.append(workner_net)
-
- #self.Pool = Pool(num_worker)
-
- self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
- self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
- self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
-
- self.minsize = float(minsize)
- self.factor = float(factor)
- self.threshold = threshold
-
- def convert_to_square(self, bbox):
- """
- convert bbox to square
-
- Parameters:
- ----------
- bbox: numpy array , shape n x 5
- input bbox
-
- Returns:
- -------
- square bbox
- """
- square_bbox = bbox.copy()
-
- h = bbox[:, 3] - bbox[:, 1] + 1
- w = bbox[:, 2] - bbox[:, 0] + 1
- max_side = np.maximum(h, w)
- square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - max_side * 0.5
- square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - max_side * 0.5
- square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
- square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
- return square_bbox
-
- def calibrate_box(self, bbox, reg):
- """
- calibrate bboxes
-
- Parameters:
- ----------
- bbox: numpy array, shape n x 5
- input bboxes
- reg: numpy array, shape n x 4
- bboxex adjustment
-
- Returns:
- -------
- bboxes after refinement
-
- """
- w = bbox[:, 2] - bbox[:, 0] + 1
- w = np.expand_dims(w, 1)
- h = bbox[:, 3] - bbox[:, 1] + 1
- h = np.expand_dims(h, 1)
- reg_m = np.hstack([w, h, w, h])
- aug = reg_m * reg
- bbox[:, 0:4] = bbox[:, 0:4] + aug
- return bbox
-
- def pad(self, bboxes, w, h):
- """
- pad the the bboxes, alse restrict the size of it
-
- Parameters:
- ----------
- bboxes: numpy array, n x 5
- input bboxes
- w: float number
- width of the input image
- h: float number
- height of the input image
- Returns :
- ------s
- dy, dx : numpy array, n x 1
- start point of the bbox in target image
- edy, edx : numpy array, n x 1
- end point of the bbox in target image
- y, x : numpy array, n x 1
- start point of the bbox in original image
- ex, ex : numpy array, n x 1
- end point of the bbox in original image
- tmph, tmpw: numpy array, n x 1
- height and width of the bbox
-
- """
- tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:,
- 3] - bboxes[:,
- 1] + 1
- num_box = bboxes.shape[0]
-
- dx, dy = np.zeros((num_box, )), np.zeros((num_box, ))
- edx, edy = tmpw.copy() - 1, tmph.copy() - 1
-
- x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
-
- tmp_index = np.where(ex > w - 1)
- edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
- ex[tmp_index] = w - 1
-
- tmp_index = np.where(ey > h - 1)
- edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
- ey[tmp_index] = h - 1
-
- tmp_index = np.where(x < 0)
- dx[tmp_index] = 0 - x[tmp_index]
- x[tmp_index] = 0
-
- tmp_index = np.where(y < 0)
- dy[tmp_index] = 0 - y[tmp_index]
- y[tmp_index] = 0
-
- return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
- return_list = [item.astype(np.int32) for item in return_list]
-
- return return_list
-
- def slice_index(self, number):
- """
- slice the index into (n,n,m), m < n
- Parameters:
- ----------
- number: int number
- number
- """
- def chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i + n]
-
- num_list = range(number)
- return list(chunks(num_list, self.num_worker))
-
- def detect_face_limited(self, img, det_type=2):
- height, width, _ = img.shape
- if det_type >= 2:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 24, 24) is the input shape for RNet
- input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- tmp[dy[i]:edy[i] + 1,
- dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
-
- output = self.RNet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[1][:, 1] > self.threshold[1])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
- reg = output[0][passed]
-
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick]
- total_boxes = self.calibrate_box(total_boxes, reg[pick])
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
- else:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
- num_box = total_boxes.shape[0]
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 48, 48) is the input shape for ONet
- input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
-
- output = self.ONet.predict(input_buf)
- #print(output[2])
-
- # filter the total_boxes with threshold
- passed = np.where(output[2][:, 1] > self.threshold[2])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
- reg = output[1][passed]
- points = output[0][passed]
-
- # compute landmark points
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
- points[:, 0:5] = np.expand_dims(
- total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
- points[:, 5:10] = np.expand_dims(
- total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
-
- # nms
- total_boxes = self.calibrate_box(total_boxes, reg)
- pick = nms(total_boxes, 0.7, 'Min')
- total_boxes = total_boxes[pick]
- points = points[pick]
-
- if not self.accurate_landmark:
- return total_boxes, points
-
- #############################################
- # extended stage
- #############################################
- num_box = total_boxes.shape[0]
- patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
- total_boxes[:, 3] - total_boxes[:, 1] + 1)
- patchw = np.round(patchw * 0.25)
-
- # make it even
- patchw[np.where(np.mod(patchw, 2) == 1)] += 1
-
- input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
- for i in range(5):
- x, y = points[:, i], points[:, i + 5]
- x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
- np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
- height)
- for j in range(num_box):
- tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
- tmpim[dy[j]:edy[j] + 1,
- dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
- x[j]:ex[j] + 1, :]
- input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
- cv2.resize(tmpim, (24, 24)))
-
- output = self.LNet.predict(input_buf)
-
- pointx = np.zeros((num_box, 5))
- pointy = np.zeros((num_box, 5))
-
- for k in range(5):
- # do not make a large movement
- tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
- output[k][tmp_index[0]] = 0.5
-
- pointx[:, k] = np.round(points[:, k] -
- 0.5 * patchw) + output[k][:, 0] * patchw
- pointy[:, k] = np.round(points[:, k + 5] -
- 0.5 * patchw) + output[k][:, 1] * patchw
-
- points = np.hstack([pointx, pointy])
- points = points.astype(np.int32)
-
- return total_boxes, points
-
- def detect_face(self, img, det_type=0):
- """
- detect face over img
- Parameters:
- ----------
- img: numpy array, bgr order of shape (1, 3, n, m)
- input image
- Retures:
- -------
- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
- bboxes
- points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
- landmarks
- """
-
- # check input
- height, width, _ = img.shape
- if det_type == 0:
- MIN_DET_SIZE = 12
-
- if img is None:
- return None
-
- # only works for color image
- if len(img.shape) != 3:
- return None
-
- # detected boxes
- total_boxes = []
-
- minl = min(height, width)
-
- # get all the valid scales
- scales = []
- m = MIN_DET_SIZE / self.minsize
- minl *= m
- factor_count = 0
- while minl > MIN_DET_SIZE:
- scales.append(m * self.factor**factor_count)
- minl *= self.factor
- factor_count += 1
-
- #############################################
- # first stage
- #############################################
- #for scale in scales:
- # return_boxes = self.detect_first_stage(img, scale, 0)
- # if return_boxes is not None:
- # total_boxes.append(return_boxes)
-
- sliced_index = self.slice_index(len(scales))
- total_boxes = []
- for batch in sliced_index:
- #local_boxes = self.Pool.map( detect_first_stage_warpper, \
- # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
- local_boxes = map( detect_first_stage_warpper, \
- izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
- total_boxes.extend(local_boxes)
-
- # remove the Nones
- total_boxes = [i for i in total_boxes if i is not None]
-
- if len(total_boxes) == 0:
- return None
-
- total_boxes = np.vstack(total_boxes)
-
- if total_boxes.size == 0:
- return None
-
- # merge the detection from first stage
- pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
- total_boxes = total_boxes[pick]
-
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
-
- # refine the bboxes
- total_boxes = np.vstack([
- total_boxes[:, 0] + total_boxes[:, 5] * bbw,
- total_boxes[:, 1] + total_boxes[:, 6] * bbh,
- total_boxes[:, 2] + total_boxes[:, 7] * bbw,
- total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4]
- ])
-
- total_boxes = total_boxes.T
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
- else:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
-
- #############################################
- # second stage
- #############################################
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 24, 24) is the input shape for RNet
- input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
-
- output = self.RNet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[1][:, 1] > self.threshold[1])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
- reg = output[0][passed]
-
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick]
- total_boxes = self.calibrate_box(total_boxes, reg[pick])
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
-
- #############################################
- # third stage
- #############################################
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 48, 48) is the input shape for ONet
- input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
-
- output = self.ONet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[2][:, 1] > self.threshold[2])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
- reg = output[1][passed]
- points = output[0][passed]
-
- # compute landmark points
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
- points[:, 0:5] = np.expand_dims(
- total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
- points[:, 5:10] = np.expand_dims(
- total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
-
- # nms
- total_boxes = self.calibrate_box(total_boxes, reg)
- pick = nms(total_boxes, 0.7, 'Min')
- total_boxes = total_boxes[pick]
- points = points[pick]
-
- if not self.accurate_landmark:
- return total_boxes, points
-
- #############################################
- # extended stage
- #############################################
- num_box = total_boxes.shape[0]
- patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
- total_boxes[:, 3] - total_boxes[:, 1] + 1)
- patchw = np.round(patchw * 0.25)
-
- # make it even
- patchw[np.where(np.mod(patchw, 2) == 1)] += 1
-
- input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
- for i in range(5):
- x, y = points[:, i], points[:, i + 5]
- x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
- np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
- height)
- for j in range(num_box):
- tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
- tmpim[dy[j]:edy[j] + 1,
- dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
- x[j]:ex[j] + 1, :]
- input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
- cv2.resize(tmpim, (24, 24)))
-
- output = self.LNet.predict(input_buf)
-
- pointx = np.zeros((num_box, 5))
- pointy = np.zeros((num_box, 5))
-
- for k in range(5):
- # do not make a large movement
- tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
- output[k][tmp_index[0]] = 0.5
-
- pointx[:, k] = np.round(points[:, k] -
- 0.5 * patchw) + output[k][:, 0] * patchw
- pointy[:, k] = np.round(points[:, k + 5] -
- 0.5 * patchw) + output[k][:, 1] * patchw
-
- points = np.hstack([pointx, pointy])
- points = points.astype(np.int32)
-
- return total_boxes, points
-
- def list2colmatrix(self, pts_list):
- """
- convert list to column matrix
- Parameters:
- ----------
- pts_list:
- input list
- Retures:
- -------
- colMat:
-
- """
- assert len(pts_list) > 0
- colMat = []
- for i in range(len(pts_list)):
- colMat.append(pts_list[i][0])
- colMat.append(pts_list[i][1])
- colMat = np.matrix(colMat).transpose()
- return colMat
-
- def find_tfrom_between_shapes(self, from_shape, to_shape):
- """
- find transform between shapes
- Parameters:
- ----------
- from_shape:
- to_shape:
- Retures:
- -------
- tran_m:
- tran_b:
- """
- assert from_shape.shape[0] == to_shape.shape[
- 0] and from_shape.shape[0] % 2 == 0
-
- sigma_from = 0.0
- sigma_to = 0.0
- cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
-
- # compute the mean and cov
- from_shape_points = from_shape.reshape(from_shape.shape[0] / 2, 2)
- to_shape_points = to_shape.reshape(to_shape.shape[0] / 2, 2)
- mean_from = from_shape_points.mean(axis=0)
- mean_to = to_shape_points.mean(axis=0)
-
- for i in range(from_shape_points.shape[0]):
- temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
- sigma_from += temp_dis * temp_dis
- temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
- sigma_to += temp_dis * temp_dis
- cov += (to_shape_points[i].transpose() -
- mean_to.transpose()) * (from_shape_points[i] - mean_from)
-
- sigma_from = sigma_from / to_shape_points.shape[0]
- sigma_to = sigma_to / to_shape_points.shape[0]
- cov = cov / to_shape_points.shape[0]
-
- # compute the affine matrix
- s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
- u, d, vt = np.linalg.svd(cov)
-
- if np.linalg.det(cov) < 0:
- if d[1] < d[0]:
- s[1, 1] = -1
- else:
- s[0, 0] = -1
- r = u * s * vt
- c = 1.0
- if sigma_from != 0:
- c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
-
- tran_b = mean_to.transpose() - c * r * mean_from.transpose()
- tran_m = c * r
-
- return tran_m, tran_b
-
- def extract_image_chips(self, img, points, desired_size=256, padding=0):
- """
- crop and align face
- Parameters:
- ----------
- img: numpy array, bgr order of shape (1, 3, n, m)
- input image
- points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
- desired_size: default 256
- padding: default 0
- Retures:
- -------
- crop_imgs: list, n
- cropped and aligned faces
- """
- crop_imgs = []
- for p in points:
- shape = []
- for k in range(len(p) / 2):
- shape.append(p[k])
- shape.append(p[k + 5])
-
- if padding > 0:
- padding = padding
- else:
- padding = 0
- # average positions of face points
- mean_face_shape_x = [
- 0.224152, 0.75610125, 0.490127, 0.254149, 0.726104
- ]
- mean_face_shape_y = [
- 0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233
- ]
-
- from_points = []
- to_points = []
-
- for i in range(len(shape) / 2):
- x = (padding + mean_face_shape_x[i]) / (2 * padding +
- 1) * desired_size
- y = (padding + mean_face_shape_y[i]) / (2 * padding +
- 1) * desired_size
- to_points.append([x, y])
- from_points.append([shape[2 * i], shape[2 * i + 1]])
-
- # convert the points to Mat
- from_mat = self.list2colmatrix(from_points)
- to_mat = self.list2colmatrix(to_points)
-
- # compute the similar transfrom
- tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
-
- probe_vec = np.matrix([1.0, 0.0]).transpose()
- probe_vec = tran_m * probe_vec
-
- scale = np.linalg.norm(probe_vec)
- angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0,
- 0])
-
- from_center = [(shape[0] + shape[2]) / 2.0,
- (shape[1] + shape[3]) / 2.0]
- to_center = [0, 0]
- to_center[1] = desired_size * 0.4
- to_center[0] = desired_size * 0.5
-
- ex = to_center[0] - from_center[0]
- ey = to_center[1] - from_center[1]
-
- rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]),
- -1 * angle, scale)
- rot_mat[0][2] += ex
- rot_mat[1][2] += ey
-
- chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
- crop_imgs.append(chips)
-
- return crop_imgs
-
diff --git a/detection/README.md b/detection/README.md
new file mode 100644
index 0000000..229814e
--- /dev/null
+++ b/detection/README.md
@@ -0,0 +1,42 @@
+## Face Detection
+
+
+
+
+
+
+
+## Introduction
+
+These are the face detection methods of [InsightFace](https://insightface.ai)
+
+
+
+
+
+
+
+### Datasets
+
+ Please refer to [datasets](_datasets_) page for the details of face detection datasets used for training and evaluation.
+
+### Evaluation
+
+ Please refer to [evaluation](_evaluation_) page for the details of face recognition evaluation.
+
+
+## Methods
+
+
+Supported methods:
+
+- [x] [RetinaFace (CVPR'2020)](retinaface)
+- [x] [SCRFD (Arxiv'2021)](scrfd)
+
+
+
+## Contributing
+
+We appreciate all contributions to improve the face detection model zoo of InsightFace.
+
+
diff --git a/detection/RetinaFace/README.md b/detection/RetinaFace/README.md
index 1e1f07f..521d8d9 100644
--- a/detection/RetinaFace/README.md
+++ b/detection/RetinaFace/README.md
@@ -4,9 +4,9 @@
RetinaFace is a practical single-stage [SOTA](http://shuoyang1213.me/WIDERFACE/WiderFace_Results.html) face detector which is initially introduced in [arXiv technical report](https://arxiv.org/abs/1905.00641) and then accepted by [CVPR 2020](https://openaccess.thecvf.com/content_CVPR_2020/html/Deng_RetinaFace_Single-Shot_Multi-Level_Face_Localisation_in_the_Wild_CVPR_2020_paper.html).
-
+
-
+
## Data
diff --git a/detection/RetinaFaceAntiCov/README.md b/detection/retinaface_anticov/README.md
similarity index 90%
rename from detection/RetinaFaceAntiCov/README.md
rename to detection/retinaface_anticov/README.md
index ae6c222..97b3b24 100644
--- a/detection/RetinaFaceAntiCov/README.md
+++ b/detection/retinaface_anticov/README.md
@@ -4,7 +4,7 @@
RetinaFace-Anti-Cov is a customized one stage face detector to help people protect themselves from CovID-19.
-
+
## Testing
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/__init__.py b/detection/retinaface_anticov/rcnn/processing/__init__.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/__init__.py
rename to detection/retinaface_anticov/rcnn/processing/__init__.py
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/assign_levels.py b/detection/retinaface_anticov/rcnn/processing/assign_levels.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/assign_levels.py
rename to detection/retinaface_anticov/rcnn/processing/assign_levels.py
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/bbox_regression.py b/detection/retinaface_anticov/rcnn/processing/bbox_regression.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/bbox_regression.py
rename to detection/retinaface_anticov/rcnn/processing/bbox_regression.py
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/bbox_transform.py b/detection/retinaface_anticov/rcnn/processing/bbox_transform.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/bbox_transform.py
rename to detection/retinaface_anticov/rcnn/processing/bbox_transform.py
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/bbox_transform.py.orig b/detection/retinaface_anticov/rcnn/processing/bbox_transform.py.orig
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/bbox_transform.py.orig
rename to detection/retinaface_anticov/rcnn/processing/bbox_transform.py.orig
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/generate_anchor.py b/detection/retinaface_anticov/rcnn/processing/generate_anchor.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/generate_anchor.py
rename to detection/retinaface_anticov/rcnn/processing/generate_anchor.py
diff --git a/detection/RetinaFaceAntiCov/rcnn/processing/nms.py b/detection/retinaface_anticov/rcnn/processing/nms.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/rcnn/processing/nms.py
rename to detection/retinaface_anticov/rcnn/processing/nms.py
diff --git a/detection/RetinaFaceAntiCov/retinaface_cov.py b/detection/retinaface_anticov/retinaface_cov.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/retinaface_cov.py
rename to detection/retinaface_anticov/retinaface_cov.py
diff --git a/detection/RetinaFaceAntiCov/test.py b/detection/retinaface_anticov/test.py
similarity index 100%
rename from detection/RetinaFaceAntiCov/test.py
rename to detection/retinaface_anticov/test.py
diff --git a/evaluation/IJB/IJBB_Evaluation_MS1MV2.ipynb b/evaluation/IJB/IJBB_Evaluation_MS1MV2.ipynb
deleted file mode 100644
index c18234e..0000000
--- a/evaluation/IJB/IJBB_Evaluation_MS1MV2.ipynb
+++ /dev/null
@@ -1,520 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jd4615/miniconda3/envs/insightface/lib/python2.7/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
- " _nan_object_mask = _nan_object_array != _nan_object_array\n"
- ]
- }
- ],
- "source": [
- "import os\n",
- "import numpy as np\n",
- "import cPickle\n",
- "from sklearn.metrics import roc_curve, auc\n",
- "import matplotlib.pyplot as plt\n",
- "import timeit\n",
- "import sklearn\n",
- "import cv2\n",
- "import sys\n",
- "import glob\n",
- "sys.path.append('./recognition')\n",
- "from embedding import Embedding\n",
- "from menpo.visualize import print_progress\n",
- "from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
- "from prettytable import PrettyTable\n",
- "from pathlib import Path\n",
- "import warnings \n",
- "warnings.filterwarnings(\"ignore\") "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_media_list(path):\n",
- " ijb_meta = np.loadtxt(path, dtype=str)\n",
- " templates = ijb_meta[:,1].astype(np.int)\n",
- " medias = ijb_meta[:,2].astype(np.int)\n",
- " return templates, medias"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_pair_list(path):\n",
- " pairs = np.loadtxt(path, dtype=str)\n",
- " t1 = pairs[:,0].astype(np.int)\n",
- " t2 = pairs[:,1].astype(np.int)\n",
- " label = pairs[:,2].astype(np.int)\n",
- " return t1, t2, label"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_image_feature(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_image_feature(img_path, img_list_path, model_path, gpu_id):\n",
- " img_list = open(img_list_path)\n",
- " embedding = Embedding(model_path, 0, gpu_id)\n",
- " files = img_list.readlines()\n",
- " img_feats = []\n",
- " faceness_scores = []\n",
- " for img_index, each_line in enumerate(print_progress(files)):\n",
- " name_lmk_score = each_line.strip().split(' ')\n",
- " img_name = os.path.join(img_path, name_lmk_score[0])\n",
- " img = cv2.imread(img_name)\n",
- " lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32)\n",
- " lmk = lmk.reshape( (5,2) )\n",
- " img_feats.append(embedding.get(img,lmk))\n",
- " faceness_scores.append(name_lmk_score[-1])\n",
- " img_feats = np.array(img_feats).astype(np.float32)\n",
- " faceness_scores = np.array(faceness_scores).astype(np.float32)\n",
- " return img_feats, faceness_scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "def image2template_feature(img_feats = None, templates = None, medias = None):\n",
- " # ==========================================================\n",
- " # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]\n",
- " # 2. compute media feature.\n",
- " # 3. compute template feature.\n",
- " # ========================================================== \n",
- " unique_templates = np.unique(templates)\n",
- " template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))\n",
- "\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " (ind_t,) = np.where(templates == uqt)\n",
- " face_norm_feats = img_feats[ind_t]\n",
- " face_medias = medias[ind_t]\n",
- " unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)\n",
- " media_norm_feats = []\n",
- " for u,ct in zip(unique_medias, unique_media_counts):\n",
- " (ind_m,) = np.where(face_medias == u)\n",
- " if ct == 1:\n",
- " media_norm_feats += [face_norm_feats[ind_m]]\n",
- " else: # image features from the same video will be aggregated into one feature\n",
- " media_norm_feats += [np.mean(face_norm_feats[ind_m], 0, keepdims=True)]\n",
- " media_norm_feats = np.array(media_norm_feats)\n",
- " # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))\n",
- " template_feats[count_template] = np.sum(media_norm_feats, 0)\n",
- " if count_template % 2000 == 0: \n",
- " print('Finish Calculating {} template features.'.format(count_template))\n",
- " template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))\n",
- " return template_norm_feats, unique_templates"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def verification(template_norm_feats = None, unique_templates = None, p1 = None, p2 = None):\n",
- " # ==========================================================\n",
- " # Compute set-to-set Similarity Score.\n",
- " # ==========================================================\n",
- " template2id = np.zeros((max(unique_templates)+1,1),dtype=int)\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " template2id[uqt] = count_template\n",
- " \n",
- " score = np.zeros((len(p1),)) # save cosine distance between pairs \n",
- "\n",
- " total_pairs = np.array(range(len(p1)))\n",
- " batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation\n",
- " sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]\n",
- " total_sublists = len(sublists)\n",
- " for c, s in enumerate(sublists):\n",
- " feat1 = template_norm_feats[template2id[p1[s]]]\n",
- " feat2 = template_norm_feats[template2id[p2[s]]]\n",
- " similarity_score = np.sum(feat1 * feat2, -1)\n",
- " score[s] = similarity_score.flatten()\n",
- " if c % 10 == 0:\n",
- " print('Finish {}/{} pairs.'.format(c, total_sublists))\n",
- " return score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_score(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step1: Load Meta Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 0.83 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image and template relationships for template feature embedding\n",
- "# tid --> template id, mid --> media id \n",
- "# format:\n",
- "# image_name tid mid\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "templates, medias = read_template_media_list(os.path.join('IJBB/meta', 'ijbb_face_tid_mid.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 31.88 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load template pairs for template-to-template verification\n",
- "# tid : template id, label : 1/0\n",
- "# format:\n",
- "# tid_1 tid_2 label\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "p1, p2, label = read_template_pair_list(os.path.join('IJBB/meta', 'ijbb_template_pair_label.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 2: Get Image Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "('loading', './pretrained_models/MS1MV2-ResNet100-Arcface/model', 0)\n",
- "[====================] 100% (227630/227630) - done. \n",
- "Time: 3279.69 s. \n",
- "Feature Shape: (227630 , 1024) .\n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image features \n",
- "# format:\n",
- "# img_feats: [image_num x feats_dim] (227630, 512)\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "#img_feats = read_image_feature('./MS1MV2/IJBB_MS1MV2_r100_arcface.pkl')\n",
- "img_path = './IJBB/loose_crop'\n",
- "img_list_path = './IJBB/meta/ijbb_name_5pts_score.txt'\n",
- "model_path = './pretrained_models/MS1MV2-ResNet100-Arcface/model'\n",
- "gpu_id = 1\n",
- "img_feats, faceness_scores = get_image_feature(img_path, img_list_path, model_path, gpu_id)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))\n",
- "print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step3: Get Template Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish Calculating 0 template features.\n",
- "Finish Calculating 2000 template features.\n",
- "Finish Calculating 4000 template features.\n",
- "Finish Calculating 6000 template features.\n",
- "Finish Calculating 8000 template features.\n",
- "Finish Calculating 10000 template features.\n",
- "Finish Calculating 12000 template features.\n",
- "Time: 3.65 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute template features from image features.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "# ========================================================== \n",
- "# Norm feature before aggregation into template feature?\n",
- "# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).\n",
- "# ========================================================== \n",
- "# 1. FaceScore (Feature Norm)\n",
- "# 2. FaceScore (Detector)\n",
- "\n",
- "use_norm_score = True # if True, TestMode(N1) \n",
- "use_detector_score = True # if True, TestMode(D1)\n",
- "use_flip_test = True # if True, TestMode(F2)\n",
- "\n",
- "if use_flip_test:\n",
- " # concat --- F1\n",
- " # img_input_feats = img_feats \n",
- " # add --- F2\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2] + img_feats[:,img_feats.shape[1]/2:]\n",
- "else:\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2]\n",
- " \n",
- "if use_norm_score:\n",
- " img_input_feats = img_input_feats\n",
- "else:\n",
- " # normalise features to remove norm information\n",
- " img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) \n",
- " \n",
- "if use_detector_score:\n",
- " img_input_feats = img_input_feats * np.matlib.repmat(faceness_scores[:,np.newaxis], 1, img_input_feats.shape[1])\n",
- "else:\n",
- " img_input_feats = img_input_feats\n",
- "\n",
- "template_norm_feats, unique_templates = image2template_feature(img_input_feats, templates, medias)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 4: Get Template Similarity Scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish 0/81 pairs.\n",
- "Finish 10/81 pairs.\n",
- "Finish 20/81 pairs.\n",
- "Finish 30/81 pairs.\n",
- "Finish 40/81 pairs.\n",
- "Finish 50/81 pairs.\n",
- "Finish 60/81 pairs.\n",
- "Finish 70/81 pairs.\n",
- "Finish 80/81 pairs.\n",
- "Time: 77.30 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute verification scores between template pairs.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "score = verification(template_norm_feats, unique_templates, p1, p2)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [],
- "source": [
- "score_save_name = './IJBB/result/MS1MV2-ResNet100-ArcFace-TestMode(N1D1F2).npy'\n",
- "np.save(score_save_name, score)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 5: Get ROC Curves and TPR@FPR Table"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- "
"
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "score_save_path = './IJBB/result'\n",
- "files = glob.glob(score_save_path + '/MS1MV2*.npy') \n",
- "methods = []\n",
- "scores = []\n",
- "for file in files:\n",
- " methods.append(Path(file).stem)\n",
- " scores.append(np.load(file)) \n",
- "methods = np.array(methods)\n",
- "scores = dict(zip(methods,scores))\n",
- "colours = dict(zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))\n",
- "#x_labels = [1/(10**x) for x in np.linspace(6, 0, 6)]\n",
- "x_labels = [10**-6, 10**-5, 10**-4,10**-3, 10**-2, 10**-1]\n",
- "tpr_fpr_table = PrettyTable(['Methods'] + map(str, x_labels))\n",
- "fig = plt.figure()\n",
- "for method in methods:\n",
- " fpr, tpr, _ = roc_curve(label, scores[method])\n",
- " roc_auc = auc(fpr, tpr)\n",
- " fpr = np.flipud(fpr)\n",
- " tpr = np.flipud(tpr) # select largest tpr at same fpr\n",
- " plt.plot(fpr, tpr, color=colours[method], lw=1, label=('[%s (AUC = %0.4f %%)]' % (method.split('-')[-1], roc_auc*100)))\n",
- " tpr_fpr_row = []\n",
- " tpr_fpr_row.append(method)\n",
- " for fpr_iter in np.arange(len(x_labels)):\n",
- " _, min_index = min(list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr)))))\n",
- " tpr_fpr_row.append('%.4f' % tpr[min_index])\n",
- " tpr_fpr_table.add_row(tpr_fpr_row)\n",
- "plt.xlim([10**-6, 0.1])\n",
- "plt.ylim([0.3, 1.0])\n",
- "plt.grid(linestyle='--', linewidth=1)\n",
- "plt.xticks(x_labels) \n",
- "plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) \n",
- "plt.xscale('log')\n",
- "plt.xlabel('False Positive Rate')\n",
- "plt.ylabel('True Positive Rate')\n",
- "plt.title('ROC on IJB-B')\n",
- "plt.legend(loc=\"lower right\")\n",
- "plt.show()\n",
- "#fig.savefig('IJB-B.pdf')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| Methods | 1e-06 | 1e-05 | 0.0001 | 0.001 | 0.01 | 0.1 |\n",
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F1) | 0.4091 | 0.9081 | 0.9477 | 0.9636 | 0.9755 | 0.9863 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D1F2) | 0.4089 | 0.8995 | 0.9463 | 0.9642 | 0.9761 | 0.9867 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F2) | 0.4281 | 0.9082 | 0.9490 | 0.9647 | 0.9767 | 0.9866 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D0F0) | 0.3900 | 0.9042 | 0.9467 | 0.9620 | 0.9761 | 0.9860 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D0F0) | 0.3828 | 0.8933 | 0.9425 | 0.9615 | 0.9751 | 0.9856 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F0) | 0.3930 | 0.9039 | 0.9476 | 0.9630 | 0.9758 | 0.9861 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D1F0) | 0.3892 | 0.8984 | 0.9456 | 0.9626 | 0.9753 | 0.9861 |\n",
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n"
- ]
- }
- ],
- "source": [
- "print(tpr_fpr_table)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# setting N1D1F2 is the best"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.15"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/evaluation/IJB/IJBB_Evaluation_VGG2.ipynb b/evaluation/IJB/IJBB_Evaluation_VGG2.ipynb
deleted file mode 100644
index 0ed5fa7..0000000
--- a/evaluation/IJB/IJBB_Evaluation_VGG2.ipynb
+++ /dev/null
@@ -1,535 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jd4615/miniconda3/envs/insightface/lib/python2.7/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
- " _nan_object_mask = _nan_object_array != _nan_object_array\n"
- ]
- }
- ],
- "source": [
- "import os\n",
- "import numpy as np\n",
- "import cPickle\n",
- "from sklearn.metrics import roc_curve, auc\n",
- "import matplotlib.pyplot as plt\n",
- "import timeit\n",
- "import sklearn\n",
- "import cv2\n",
- "import sys\n",
- "import glob\n",
- "sys.path.append('./recognition')\n",
- "from embedding import Embedding\n",
- "from menpo.visualize import print_progress\n",
- "from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
- "from prettytable import PrettyTable\n",
- "from pathlib import Path\n",
- "import warnings \n",
- "warnings.filterwarnings(\"ignore\") "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_media_list(path):\n",
- " ijb_meta = np.loadtxt(path, dtype=str)\n",
- " templates = ijb_meta[:,1].astype(np.int)\n",
- " medias = ijb_meta[:,2].astype(np.int)\n",
- " return templates, medias"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_pair_list(path):\n",
- " pairs = np.loadtxt(path, dtype=str)\n",
- " t1 = pairs[:,0].astype(np.int)\n",
- " t2 = pairs[:,1].astype(np.int)\n",
- " label = pairs[:,2].astype(np.int)\n",
- " return t1, t2, label"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_image_feature(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_image_feature(img_path, img_list_path, model_path, gpu_id):\n",
- " img_list = open(img_list_path)\n",
- " embedding = Embedding(model_path, 0, gpu_id)\n",
- " files = img_list.readlines()\n",
- " img_feats = []\n",
- " faceness_scores = []\n",
- " for img_index, each_line in enumerate(print_progress(files)):\n",
- " name_lmk_score = each_line.strip().split(' ')\n",
- " img_name = os.path.join(img_path, name_lmk_score[0])\n",
- " img = cv2.imread(img_name)\n",
- " lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32)\n",
- " lmk = lmk.reshape( (5,2) )\n",
- " img_feats.append(embedding.get(img,lmk))\n",
- " faceness_scores.append(name_lmk_score[-1])\n",
- " img_feats = np.array(img_feats).astype(np.float32)\n",
- " faceness_scores = np.array(faceness_scores).astype(np.float32)\n",
- " return img_feats, faceness_scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "def image2template_feature(img_feats = None, templates = None, medias = None):\n",
- " # ==========================================================\n",
- " # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]\n",
- " # 2. compute media feature.\n",
- " # 3. compute template feature.\n",
- " # ========================================================== \n",
- " unique_templates = np.unique(templates)\n",
- " template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))\n",
- "\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " (ind_t,) = np.where(templates == uqt)\n",
- " face_norm_feats = img_feats[ind_t]\n",
- " face_medias = medias[ind_t]\n",
- " unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)\n",
- " media_norm_feats = []\n",
- " for u,ct in zip(unique_medias, unique_media_counts):\n",
- " (ind_m,) = np.where(face_medias == u)\n",
- " if ct == 1:\n",
- " media_norm_feats += [face_norm_feats[ind_m]]\n",
- " else: # image features from the same video will be aggregated into one feature\n",
- " media_norm_feats += [np.mean(face_norm_feats[ind_m], 0, keepdims=True)]\n",
- " media_norm_feats = np.array(media_norm_feats)\n",
- " # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))\n",
- " template_feats[count_template] = np.sum(media_norm_feats, 0)\n",
- " if count_template % 2000 == 0: \n",
- " print('Finish Calculating {} template features.'.format(count_template))\n",
- " template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))\n",
- " return template_norm_feats, unique_templates"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def verification(template_norm_feats = None, unique_templates = None, p1 = None, p2 = None):\n",
- " # ==========================================================\n",
- " # Compute set-to-set Similarity Score.\n",
- " # ==========================================================\n",
- " template2id = np.zeros((max(unique_templates)+1,1),dtype=int)\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " template2id[uqt] = count_template\n",
- " \n",
- " score = np.zeros((len(p1),)) # save cosine distance between pairs \n",
- "\n",
- " total_pairs = np.array(range(len(p1)))\n",
- " batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation\n",
- " sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]\n",
- " total_sublists = len(sublists)\n",
- " for c, s in enumerate(sublists):\n",
- " feat1 = template_norm_feats[template2id[p1[s]]]\n",
- " feat2 = template_norm_feats[template2id[p2[s]]]\n",
- " similarity_score = np.sum(feat1 * feat2, -1)\n",
- " score[s] = similarity_score.flatten()\n",
- " if c % 10 == 0:\n",
- " print('Finish {}/{} pairs.'.format(c, total_sublists))\n",
- " return score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_score(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step1: Load Meta Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 0.83 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image and template relationships for template feature embedding\n",
- "# tid --> template id, mid --> media id \n",
- "# format:\n",
- "# image_name tid mid\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "templates, medias = read_template_media_list(os.path.join('IJBB/meta', 'ijbb_face_tid_mid.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 31.75 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load template pairs for template-to-template verification\n",
- "# tid : template id, label : 1/0\n",
- "# format:\n",
- "# tid_1 tid_2 label\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "p1, p2, label = read_template_pair_list(os.path.join('IJBB/meta', 'ijbb_template_pair_label.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 2: Get Image Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "('loading', './pretrained_models/VGG2-ResNet50-Arcface/model', 0)\n",
- "[====================] 100% (227630/227630) - done. \n",
- "Time: 2386.28 s. \n",
- "Feature Shape: (227630 , 1024) .\n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image features \n",
- "# format:\n",
- "# img_feats: [image_num x feats_dim] (227630, 512)\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "#img_feats = read_image_feature('./MS1MV2/IJBB_MS1MV2_r100_arcface.pkl')\n",
- "img_path = './IJBB/loose_crop'\n",
- "img_list_path = './IJBB/meta/ijbb_name_5pts_score.txt'\n",
- "model_path = './pretrained_models/VGG2-ResNet50-Arcface/model'\n",
- "gpu_id = 0\n",
- "img_feats, faceness_scores = get_image_feature(img_path, img_list_path, model_path, gpu_id)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))\n",
- "print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step3: Get Template Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish Calculating 0 template features.\n",
- "Finish Calculating 2000 template features.\n",
- "Finish Calculating 4000 template features.\n",
- "Finish Calculating 6000 template features.\n",
- "Finish Calculating 8000 template features.\n",
- "Finish Calculating 10000 template features.\n",
- "Finish Calculating 12000 template features.\n",
- "Time: 3.41 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute template features from image features.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "# ========================================================== \n",
- "# Norm feature before aggregation into template feature?\n",
- "# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).\n",
- "# ========================================================== \n",
- "# 1. FaceScore (Feature Norm)\n",
- "# 2. FaceScore (Detector)\n",
- "\n",
- "use_norm_score = False # if True, TestMode(N1) \n",
- "use_detector_score = True # if True, TestMode(D1)\n",
- "use_flip_test = True # if True, TestMode(F1)\n",
- "\n",
- "if use_flip_test:\n",
- " # concat --- F1\n",
- " #img_input_feats = img_feats \n",
- " # add --- F2\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2] + img_feats[:,img_feats.shape[1]/2:]\n",
- "else:\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2]\n",
- " \n",
- "if use_norm_score:\n",
- " img_input_feats = img_input_feats\n",
- "else:\n",
- " # normalise features to remove norm information\n",
- " img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) \n",
- " \n",
- "if use_detector_score:\n",
- " img_input_feats = img_input_feats * np.matlib.repmat(faceness_scores[:,np.newaxis], 1, img_input_feats.shape[1])\n",
- "else:\n",
- " img_input_feats = img_input_feats\n",
- "\n",
- "template_norm_feats, unique_templates = image2template_feature(img_input_feats, templates, medias)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 4: Get Template Similarity Scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish 0/81 pairs.\n",
- "Finish 10/81 pairs.\n",
- "Finish 20/81 pairs.\n",
- "Finish 30/81 pairs.\n",
- "Finish 40/81 pairs.\n",
- "Finish 50/81 pairs.\n",
- "Finish 60/81 pairs.\n",
- "Finish 70/81 pairs.\n",
- "Finish 80/81 pairs.\n",
- "Time: 38.38 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute verification scores between template pairs.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "score = verification(template_norm_feats, unique_templates, p1, p2)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "score_save_name = './IJBB/result/VGG2-ResNet50-ArcFace-TestMode(N0D1F2).npy'\n",
- "np.save(score_save_name, score)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 5: Get ROC Curves and TPR@FPR Table"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "score_save_path = './IJBB/result'\n",
- "files = glob.glob(score_save_path + '/VGG2*.npy') \n",
- "methods = []\n",
- "scores = []\n",
- "for file in files:\n",
- " methods.append(Path(file).stem)\n",
- " scores.append(np.load(file)) \n",
- "methods = np.array(methods)\n",
- "scores = dict(zip(methods,scores))\n",
- "colours = dict(zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))\n",
- "#x_labels = [1/(10**x) for x in np.linspace(6, 0, 6)]\n",
- "x_labels = [10**-6, 10**-5, 10**-4,10**-3, 10**-2, 10**-1]\n",
- "tpr_fpr_table = PrettyTable(['Methods'] + map(str, x_labels))\n",
- "fig = plt.figure()\n",
- "for method in methods:\n",
- " fpr, tpr, _ = roc_curve(label, scores[method])\n",
- " roc_auc = auc(fpr, tpr)\n",
- " fpr = np.flipud(fpr)\n",
- " tpr = np.flipud(tpr) # select largest tpr at same fpr\n",
- " plt.plot(fpr, tpr, color=colours[method], lw=1, label=('[%s (AUC = %0.4f %%)]' % (method.split('-')[-1], roc_auc*100)))\n",
- " tpr_fpr_row = []\n",
- " tpr_fpr_row.append(method)\n",
- " for fpr_iter in np.arange(len(x_labels)):\n",
- " _, min_index = min(list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr)))))\n",
- " tpr_fpr_row.append('%.4f' % tpr[min_index])\n",
- " tpr_fpr_table.add_row(tpr_fpr_row)\n",
- "plt.xlim([10**-6, 0.1])\n",
- "plt.ylim([0.3, 1.0])\n",
- "plt.grid(linestyle='--', linewidth=1)\n",
- "plt.xticks(x_labels) \n",
- "plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) \n",
- "plt.xscale('log')\n",
- "plt.xlabel('False Positive Rate')\n",
- "plt.ylabel('True Positive Rate')\n",
- "plt.title('ROC on IJB-B')\n",
- "plt.legend(loc=\"lower right\")\n",
- "plt.show()\n",
- "#fig.savefig('IJB-B.pdf')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| Methods | 1e-06 | 1e-05 | 0.0001 | 0.001 | 0.01 | 0.1 |\n",
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F2) | 0.4044 | 0.8145 | 0.9056 | 0.9497 | 0.9779 | 0.9922 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D0F0) | 0.4035 | 0.8038 | 0.8976 | 0.9437 | 0.9755 | 0.9914 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F1) | 0.3940 | 0.8124 | 0.9028 | 0.9479 | 0.9770 | 0.9919 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D0F0) | 0.3893 | 0.8050 | 0.8990 | 0.9448 | 0.9759 | 0.9918 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F0) | 0.4098 | 0.8123 | 0.9022 | 0.9463 | 0.9766 | 0.9918 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D1F0) | 0.3949 | 0.8130 | 0.9036 | 0.9471 | 0.9767 | 0.9919 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D1F2) | 0.4011 | 0.8210 | 0.9069 | 0.9500 | 0.9779 | 0.9924 |\n",
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n"
- ]
- }
- ],
- "source": [
- "print(tpr_fpr_table)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# setting N0D1F2 is the best"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Test Setting Conclusions"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### (1) add is better than concat for the flip test (N1D1F2 v.s. N1D1F1)\n",
- "#### (2) detection score contains some faceness information to decrease weights of noise samples within the template (N0D1F0 v.s. N0D0F0)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.15"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/evaluation/IJB/IJBC_Evaluation_MS1MV2.ipynb b/evaluation/IJB/IJBC_Evaluation_MS1MV2.ipynb
deleted file mode 100644
index e364be8..0000000
--- a/evaluation/IJB/IJBC_Evaluation_MS1MV2.ipynb
+++ /dev/null
@@ -1,532 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jd4615/miniconda3/envs/insightface/lib/python2.7/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
- " _nan_object_mask = _nan_object_array != _nan_object_array\n"
- ]
- }
- ],
- "source": [
- "import os\n",
- "import numpy as np\n",
- "import cPickle\n",
- "from sklearn.metrics import roc_curve, auc\n",
- "import matplotlib.pyplot as plt\n",
- "import timeit\n",
- "import sklearn\n",
- "import cv2\n",
- "import sys\n",
- "import glob\n",
- "sys.path.append('./recognition')\n",
- "from embedding import Embedding\n",
- "from menpo.visualize import print_progress\n",
- "from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
- "from prettytable import PrettyTable\n",
- "from pathlib import Path\n",
- "import warnings \n",
- "warnings.filterwarnings(\"ignore\") "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_media_list(path):\n",
- " ijb_meta = np.loadtxt(path, dtype=str)\n",
- " templates = ijb_meta[:,1].astype(np.int)\n",
- " medias = ijb_meta[:,2].astype(np.int)\n",
- " return templates, medias"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_pair_list(path):\n",
- " pairs = np.loadtxt(path, dtype=str)\n",
- " t1 = pairs[:,0].astype(np.int)\n",
- " t2 = pairs[:,1].astype(np.int)\n",
- " label = pairs[:,2].astype(np.int)\n",
- " return t1, t2, label"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_image_feature(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_image_feature(img_path, img_list_path, model_path, gpu_id):\n",
- " img_list = open(img_list_path)\n",
- " embedding = Embedding(model_path, 0, gpu_id)\n",
- " files = img_list.readlines()\n",
- " img_feats = []\n",
- " faceness_scores = []\n",
- " for img_index, each_line in enumerate(print_progress(files)):\n",
- " name_lmk_score = each_line.strip().split(' ')\n",
- " img_name = os.path.join(img_path, name_lmk_score[0])\n",
- " img = cv2.imread(img_name)\n",
- " lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32)\n",
- " lmk = lmk.reshape( (5,2) )\n",
- " img_feats.append(embedding.get(img,lmk))\n",
- " faceness_scores.append(name_lmk_score[-1])\n",
- " img_feats = np.array(img_feats).astype(np.float32)\n",
- " faceness_scores = np.array(faceness_scores).astype(np.float32)\n",
- " return img_feats, faceness_scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "def image2template_feature(img_feats = None, templates = None, medias = None):\n",
- " # ==========================================================\n",
- " # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]\n",
- " # 2. compute media feature.\n",
- " # 3. compute template feature.\n",
- " # ========================================================== \n",
- " unique_templates = np.unique(templates)\n",
- " template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))\n",
- "\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " (ind_t,) = np.where(templates == uqt)\n",
- " face_norm_feats = img_feats[ind_t]\n",
- " face_medias = medias[ind_t]\n",
- " unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)\n",
- " media_norm_feats = []\n",
- " for u,ct in zip(unique_medias, unique_media_counts):\n",
- " (ind_m,) = np.where(face_medias == u)\n",
- " if ct == 1:\n",
- " media_norm_feats += [face_norm_feats[ind_m]]\n",
- " else: # image features from the same video will be aggregated into one feature\n",
- " media_norm_feats += [np.mean(face_norm_feats[ind_m], 0, keepdims=True)]\n",
- " media_norm_feats = np.array(media_norm_feats)\n",
- " # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))\n",
- " template_feats[count_template] = np.sum(media_norm_feats, 0)\n",
- " if count_template % 2000 == 0: \n",
- " print('Finish Calculating {} template features.'.format(count_template))\n",
- " template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))\n",
- " return template_norm_feats, unique_templates"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [],
- "source": [
- "def verification(template_norm_feats = None, unique_templates = None, p1 = None, p2 = None):\n",
- " # ==========================================================\n",
- " # Compute set-to-set Similarity Score.\n",
- " # ==========================================================\n",
- " template2id = np.zeros((max(unique_templates)+1,1),dtype=int)\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " template2id[uqt] = count_template\n",
- " \n",
- " score = np.zeros((len(p1),)) # save cosine distance between pairs \n",
- "\n",
- " total_pairs = np.array(range(len(p1)))\n",
- " batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation\n",
- " sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]\n",
- " total_sublists = len(sublists)\n",
- " for c, s in enumerate(sublists):\n",
- " feat1 = template_norm_feats[template2id[p1[s]]]\n",
- " feat2 = template_norm_feats[template2id[p2[s]]]\n",
- " similarity_score = np.sum(feat1 * feat2, -1)\n",
- " score[s] = similarity_score.flatten()\n",
- " if c % 10 == 0:\n",
- " print('Finish {}/{} pairs.'.format(c, total_sublists))\n",
- " return score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_score(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step1: Load Meta Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 1.73 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image and template relationships for template feature embedding\n",
- "# tid --> template id, mid --> media id \n",
- "# format:\n",
- "# image_name tid mid\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "templates, medias = read_template_media_list(os.path.join('IJBC/meta', 'ijbc_face_tid_mid.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 63.98 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load template pairs for template-to-template verification\n",
- "# tid : template id, label : 1/0\n",
- "# format:\n",
- "# tid_1 tid_2 label\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "p1, p2, label = read_template_pair_list(os.path.join('IJBC/meta', 'ijbc_template_pair_label.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 2: Get Image Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "('loading', './pretrained_models/MS1MV2-ResNet100-Arcface/model', 0)\n",
- "[====================] 100% (469375/469375) - done. \n",
- "Time: 6806.24 s. \n",
- "Feature Shape: (469375 , 1024) .\n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image features \n",
- "# format:\n",
- "# img_feats: [image_num x feats_dim] (227630, 512)\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "#img_feats = read_image_feature('./MS1MV2/IJBB_MS1MV2_r100_arcface.pkl')\n",
- "img_path = './IJBC/loose_crop'\n",
- "img_list_path = './IJBC/meta/ijbc_name_5pts_score.txt'\n",
- "model_path = './pretrained_models/MS1MV2-ResNet100-Arcface/model'\n",
- "gpu_id = 1\n",
- "img_feats, faceness_scores = get_image_feature(img_path, img_list_path, model_path, gpu_id)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))\n",
- "print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step3: Get Template Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish Calculating 0 template features.\n",
- "Finish Calculating 2000 template features.\n",
- "Finish Calculating 4000 template features.\n",
- "Finish Calculating 6000 template features.\n",
- "Finish Calculating 8000 template features.\n",
- "Finish Calculating 10000 template features.\n",
- "Finish Calculating 12000 template features.\n",
- "Finish Calculating 14000 template features.\n",
- "Finish Calculating 16000 template features.\n",
- "Finish Calculating 18000 template features.\n",
- "Finish Calculating 20000 template features.\n",
- "Finish Calculating 22000 template features.\n",
- "Time: 7.85 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute template features from image features.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "# ========================================================== \n",
- "# Norm feature before aggregation into template feature?\n",
- "# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).\n",
- "# ========================================================== \n",
- "# 1. FaceScore (Feature Norm)\n",
- "# 2. FaceScore (Detector)\n",
- "\n",
- "use_norm_score = False # if Ture, TestMode(N1) \n",
- "use_detector_score = False # if Ture, TestMode(D1)\n",
- "use_flip_test = False # if Ture, TestMode(F1)\n",
- "\n",
- "if use_flip_test:\n",
- " # concat --- F1\n",
- " #img_input_feats = img_feats \n",
- " # add --- F2\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2] + img_feats[:,img_feats.shape[1]/2:]\n",
- "else:\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2]\n",
- " \n",
- "if use_norm_score:\n",
- " img_input_feats = img_input_feats\n",
- "else:\n",
- " # normalise features to remove norm information\n",
- " img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) \n",
- " \n",
- "if use_detector_score:\n",
- " img_input_feats = img_input_feats * np.matlib.repmat(faceness_scores[:,np.newaxis], 1, img_input_feats.shape[1])\n",
- "else:\n",
- " img_input_feats = img_input_feats\n",
- "\n",
- "template_norm_feats, unique_templates = image2template_feature(img_input_feats, templates, medias)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 4: Get Template Similarity Scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish 0/157 pairs.\n",
- "Finish 10/157 pairs.\n",
- "Finish 20/157 pairs.\n",
- "Finish 30/157 pairs.\n",
- "Finish 40/157 pairs.\n",
- "Finish 50/157 pairs.\n",
- "Finish 60/157 pairs.\n",
- "Finish 70/157 pairs.\n",
- "Finish 80/157 pairs.\n",
- "Finish 90/157 pairs.\n",
- "Finish 100/157 pairs.\n",
- "Finish 110/157 pairs.\n",
- "Finish 120/157 pairs.\n",
- "Finish 130/157 pairs.\n",
- "Finish 140/157 pairs.\n",
- "Finish 150/157 pairs.\n",
- "Time: 67.17 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute verification scores between template pairs.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "score = verification(template_norm_feats, unique_templates, p1, p2)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [],
- "source": [
- "score_save_name = './IJBC/result/MS1MV2-ResNet100-ArcFace-TestMode(N0D0F0).npy'\n",
- "np.save(score_save_name, score)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 5: Get ROC Curves and TPR@FPR Table"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "score_save_path = './IJBC/result'\n",
- "files = glob.glob(score_save_path + '/MS1MV2*.npy') \n",
- "methods = []\n",
- "scores = []\n",
- "for file in files:\n",
- " methods.append(Path(file).stem)\n",
- " scores.append(np.load(file)) \n",
- "methods = np.array(methods)\n",
- "scores = dict(zip(methods,scores))\n",
- "colours = dict(zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))\n",
- "#x_labels = [1/(10**x) for x in np.linspace(6, 0, 6)]\n",
- "x_labels = [10**-6, 10**-5, 10**-4,10**-3, 10**-2, 10**-1]\n",
- "tpr_fpr_table = PrettyTable(['Methods'] + map(str, x_labels))\n",
- "fig = plt.figure()\n",
- "for method in methods:\n",
- " fpr, tpr, _ = roc_curve(label, scores[method])\n",
- " roc_auc = auc(fpr, tpr)\n",
- " fpr = np.flipud(fpr)\n",
- " tpr = np.flipud(tpr) # select largest tpr at same fpr\n",
- " plt.plot(fpr, tpr, color=colours[method], lw=1, label=('[%s (AUC = %0.4f %%)]' % (method.split('-')[-1], roc_auc*100)))\n",
- " tpr_fpr_row = []\n",
- " tpr_fpr_row.append(method)\n",
- " for fpr_iter in np.arange(len(x_labels)):\n",
- " _, min_index = min(list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr)))))\n",
- " tpr_fpr_row.append('%.4f' % tpr[min_index])\n",
- " tpr_fpr_table.add_row(tpr_fpr_row)\n",
- "plt.xlim([10**-6, 0.1])\n",
- "plt.ylim([0.3, 1.0])\n",
- "plt.grid(linestyle='--', linewidth=1)\n",
- "plt.xticks(x_labels) \n",
- "plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) \n",
- "plt.xscale('log')\n",
- "plt.xlabel('False Positive Rate')\n",
- "plt.ylabel('True Positive Rate')\n",
- "plt.title('ROC on IJB-C')\n",
- "plt.legend(loc=\"lower right\")\n",
- "plt.show()\n",
- "#fig.savefig('IJB-B.pdf')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| Methods | 1e-06 | 1e-05 | 0.0001 | 0.001 | 0.01 | 0.1 |\n",
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F1) | 0.8997 | 0.9434 | 0.9618 | 0.9744 | 0.9832 | 0.9907 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D1F2) | 0.8829 | 0.9400 | 0.9607 | 0.9746 | 0.9833 | 0.9910 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F2) | 0.8985 | 0.9447 | 0.9628 | 0.9753 | 0.9836 | 0.9908 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D0F0) | 0.8906 | 0.9394 | 0.9603 | 0.9731 | 0.9829 | 0.9904 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D0F0) | 0.8625 | 0.9315 | 0.9565 | 0.9720 | 0.9818 | 0.9901 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N1D1F0) | 0.8943 | 0.9413 | 0.9610 | 0.9735 | 0.9829 | 0.9905 |\n",
- "| MS1MV2-ResNet100-ArcFace-TestMode(N0D1F0) | 0.8795 | 0.9387 | 0.9591 | 0.9731 | 0.9824 | 0.9904 |\n",
- "+-------------------------------------------+--------+--------+--------+--------+--------+--------+\n"
- ]
- }
- ],
- "source": [
- "print(tpr_fpr_table)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# setting N1D1F2 is the best"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.15"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/evaluation/IJB/IJBC_Evaluation_VGG2.ipynb b/evaluation/IJB/IJBC_Evaluation_VGG2.ipynb
deleted file mode 100644
index 8f32634..0000000
--- a/evaluation/IJB/IJBC_Evaluation_VGG2.ipynb
+++ /dev/null
@@ -1,532 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/jd4615/miniconda3/envs/insightface/lib/python2.7/site-packages/sklearn/utils/fixes.py:313: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
- " _nan_object_mask = _nan_object_array != _nan_object_array\n"
- ]
- }
- ],
- "source": [
- "import os\n",
- "import numpy as np\n",
- "import cPickle\n",
- "from sklearn.metrics import roc_curve, auc\n",
- "import matplotlib.pyplot as plt\n",
- "import timeit\n",
- "import sklearn\n",
- "import cv2\n",
- "import sys\n",
- "import glob\n",
- "sys.path.append('./recognition')\n",
- "from embedding import Embedding\n",
- "from menpo.visualize import print_progress\n",
- "from menpo.visualize.viewmatplotlib import sample_colours_from_colourmap\n",
- "from prettytable import PrettyTable\n",
- "from pathlib import Path\n",
- "import warnings \n",
- "warnings.filterwarnings(\"ignore\") "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_media_list(path):\n",
- " ijb_meta = np.loadtxt(path, dtype=str)\n",
- " templates = ijb_meta[:,1].astype(np.int)\n",
- " medias = ijb_meta[:,2].astype(np.int)\n",
- " return templates, medias"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_template_pair_list(path):\n",
- " pairs = np.loadtxt(path, dtype=str)\n",
- " t1 = pairs[:,0].astype(np.int)\n",
- " t2 = pairs[:,1].astype(np.int)\n",
- " label = pairs[:,2].astype(np.int)\n",
- " return t1, t2, label"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_image_feature(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "def get_image_feature(img_path, img_list_path, model_path, gpu_id):\n",
- " img_list = open(img_list_path)\n",
- " embedding = Embedding(model_path, 0, gpu_id)\n",
- " files = img_list.readlines()\n",
- " img_feats = []\n",
- " faceness_scores = []\n",
- " for img_index, each_line in enumerate(print_progress(files)):\n",
- " name_lmk_score = each_line.strip().split(' ')\n",
- " img_name = os.path.join(img_path, name_lmk_score[0])\n",
- " img = cv2.imread(img_name)\n",
- " lmk = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32)\n",
- " lmk = lmk.reshape( (5,2) )\n",
- " img_feats.append(embedding.get(img,lmk))\n",
- " faceness_scores.append(name_lmk_score[-1])\n",
- " img_feats = np.array(img_feats).astype(np.float32)\n",
- " faceness_scores = np.array(faceness_scores).astype(np.float32)\n",
- " return img_feats, faceness_scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "def image2template_feature(img_feats = None, templates = None, medias = None):\n",
- " # ==========================================================\n",
- " # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim]\n",
- " # 2. compute media feature.\n",
- " # 3. compute template feature.\n",
- " # ========================================================== \n",
- " unique_templates = np.unique(templates)\n",
- " template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))\n",
- "\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " (ind_t,) = np.where(templates == uqt)\n",
- " face_norm_feats = img_feats[ind_t]\n",
- " face_medias = medias[ind_t]\n",
- " unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)\n",
- " media_norm_feats = []\n",
- " for u,ct in zip(unique_medias, unique_media_counts):\n",
- " (ind_m,) = np.where(face_medias == u)\n",
- " if ct == 1:\n",
- " media_norm_feats += [face_norm_feats[ind_m]]\n",
- " else: # image features from the same video will be aggregated into one feature\n",
- " media_norm_feats += [np.mean(face_norm_feats[ind_m], 0, keepdims=True)]\n",
- " media_norm_feats = np.array(media_norm_feats)\n",
- " # media_norm_feats = media_norm_feats / np.sqrt(np.sum(media_norm_feats ** 2, -1, keepdims=True))\n",
- " template_feats[count_template] = np.sum(media_norm_feats, 0)\n",
- " if count_template % 2000 == 0: \n",
- " print('Finish Calculating {} template features.'.format(count_template))\n",
- " template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True))\n",
- " return template_norm_feats, unique_templates"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "def verification(template_norm_feats = None, unique_templates = None, p1 = None, p2 = None):\n",
- " # ==========================================================\n",
- " # Compute set-to-set Similarity Score.\n",
- " # ==========================================================\n",
- " template2id = np.zeros((max(unique_templates)+1,1),dtype=int)\n",
- " for count_template, uqt in enumerate(unique_templates):\n",
- " template2id[uqt] = count_template\n",
- " \n",
- " score = np.zeros((len(p1),)) # save cosine distance between pairs \n",
- "\n",
- " total_pairs = np.array(range(len(p1)))\n",
- " batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation\n",
- " sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]\n",
- " total_sublists = len(sublists)\n",
- " for c, s in enumerate(sublists):\n",
- " feat1 = template_norm_feats[template2id[p1[s]]]\n",
- " feat2 = template_norm_feats[template2id[p2[s]]]\n",
- " similarity_score = np.sum(feat1 * feat2, -1)\n",
- " score[s] = similarity_score.flatten()\n",
- " if c % 10 == 0:\n",
- " print('Finish {}/{} pairs.'.format(c, total_sublists))\n",
- " return score"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "def read_score(path):\n",
- " with open(path, 'rb') as fid:\n",
- " img_feats = cPickle.load(fid)\n",
- " return img_feats"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step1: Load Meta Data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 1.76 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image and template relationships for template feature embedding\n",
- "# tid --> template id, mid --> media id \n",
- "# format:\n",
- "# image_name tid mid\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "templates, medias = read_template_media_list(os.path.join('IJBC/meta', 'ijbc_face_tid_mid.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Time: 63.31 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load template pairs for template-to-template verification\n",
- "# tid : template id, label : 1/0\n",
- "# format:\n",
- "# tid_1 tid_2 label\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "p1, p2, label = read_template_pair_list(os.path.join('IJBC/meta', 'ijbc_template_pair_label.txt'))\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 2: Get Image Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "('loading', './pretrained_models/VGG2-ResNet50-Arcface/model', 0)\n",
- "[====================] 100% (469375/469375) - done. \n",
- "Time: 5087.25 s. \n",
- "Feature Shape: (469375 , 1024) .\n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# load image features \n",
- "# format:\n",
- "# img_feats: [image_num x feats_dim] (227630, 512)\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "#img_feats = read_image_feature('./MS1MV2/IJBB_MS1MV2_r100_arcface.pkl')\n",
- "img_path = './IJBC/loose_crop'\n",
- "img_list_path = './IJBC/meta/ijbc_name_5pts_score.txt'\n",
- "model_path = './pretrained_models/VGG2-ResNet50-Arcface/model'\n",
- "gpu_id = 0\n",
- "img_feats, faceness_scores = get_image_feature(img_path, img_list_path, model_path, gpu_id)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))\n",
- "print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step3: Get Template Features"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish Calculating 0 template features.\n",
- "Finish Calculating 2000 template features.\n",
- "Finish Calculating 4000 template features.\n",
- "Finish Calculating 6000 template features.\n",
- "Finish Calculating 8000 template features.\n",
- "Finish Calculating 10000 template features.\n",
- "Finish Calculating 12000 template features.\n",
- "Finish Calculating 14000 template features.\n",
- "Finish Calculating 16000 template features.\n",
- "Finish Calculating 18000 template features.\n",
- "Finish Calculating 20000 template features.\n",
- "Finish Calculating 22000 template features.\n",
- "Time: 9.98 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute template features from image features.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "# ========================================================== \n",
- "# Norm feature before aggregation into template feature?\n",
- "# Feature norm from embedding network and faceness score are able to decrease weights for noise samples (not face).\n",
- "# ========================================================== \n",
- "# 1. FaceScore (Feature Norm)\n",
- "# 2. FaceScore (Detector)\n",
- "\n",
- "use_norm_score = True # if True, TestMode(N1) \n",
- "use_detector_score = True # if True, TestMode(D1)\n",
- "use_flip_test = True # if True, TestMode(F1)\n",
- "\n",
- "if use_flip_test:\n",
- " # concat --- F1\n",
- " img_input_feats = img_feats \n",
- " # add --- F2\n",
- " # img_input_feats = img_feats[:,0:img_feats.shape[1]/2] + img_feats[:,img_feats.shape[1]/2:]\n",
- "else:\n",
- " img_input_feats = img_feats[:,0:img_feats.shape[1]/2]\n",
- " \n",
- "if use_norm_score:\n",
- " img_input_feats = img_input_feats\n",
- "else:\n",
- " # normalise features to remove norm information\n",
- " img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True)) \n",
- " \n",
- "if use_detector_score:\n",
- " img_input_feats = img_input_feats * np.matlib.repmat(faceness_scores[:,np.newaxis], 1, img_input_feats.shape[1])\n",
- "else:\n",
- " img_input_feats = img_input_feats\n",
- "\n",
- "template_norm_feats, unique_templates = image2template_feature(img_input_feats, templates, medias)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 4: Get Template Similarity Scores"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Finish 0/157 pairs.\n",
- "Finish 10/157 pairs.\n",
- "Finish 20/157 pairs.\n",
- "Finish 30/157 pairs.\n",
- "Finish 40/157 pairs.\n",
- "Finish 50/157 pairs.\n",
- "Finish 60/157 pairs.\n",
- "Finish 70/157 pairs.\n",
- "Finish 80/157 pairs.\n",
- "Finish 90/157 pairs.\n",
- "Finish 100/157 pairs.\n",
- "Finish 110/157 pairs.\n",
- "Finish 120/157 pairs.\n",
- "Finish 130/157 pairs.\n",
- "Finish 140/157 pairs.\n",
- "Finish 150/157 pairs.\n",
- "Time: 146.08 s. \n"
- ]
- }
- ],
- "source": [
- "# =============================================================\n",
- "# compute verification scores between template pairs.\n",
- "# =============================================================\n",
- "start = timeit.default_timer()\n",
- "score = verification(template_norm_feats, unique_templates, p1, p2)\n",
- "stop = timeit.default_timer()\n",
- "print('Time: %.2f s. ' % (stop - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [],
- "source": [
- "score_save_name = './IJBC/result/VGG2-ResNet50-ArcFace-TestMode(N1D1F1).npy'\n",
- "np.save(score_save_name, score)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Step 5: Get ROC Curves and TPR@FPR Table"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "score_save_path = './IJBC/result'\n",
- "files = glob.glob(score_save_path + '/VGG2*.npy') \n",
- "methods = []\n",
- "scores = []\n",
- "for file in files:\n",
- " methods.append(Path(file).stem)\n",
- " scores.append(np.load(file)) \n",
- "methods = np.array(methods)\n",
- "scores = dict(zip(methods,scores))\n",
- "colours = dict(zip(methods, sample_colours_from_colourmap(methods.shape[0], 'Set2')))\n",
- "#x_labels = [1/(10**x) for x in np.linspace(6, 0, 6)]\n",
- "x_labels = [10**-6, 10**-5, 10**-4,10**-3, 10**-2, 10**-1]\n",
- "tpr_fpr_table = PrettyTable(['Methods'] + map(str, x_labels))\n",
- "fig = plt.figure()\n",
- "for method in methods:\n",
- " fpr, tpr, _ = roc_curve(label, scores[method])\n",
- " roc_auc = auc(fpr, tpr)\n",
- " fpr = np.flipud(fpr)\n",
- " tpr = np.flipud(tpr) # select largest tpr at same fpr\n",
- " plt.plot(fpr, tpr, color=colours[method], lw=1, label=('[%s (AUC = %0.4f %%)]' % (method.split('-')[-1], roc_auc*100)))\n",
- " tpr_fpr_row = []\n",
- " tpr_fpr_row.append(method)\n",
- " for fpr_iter in np.arange(len(x_labels)):\n",
- " _, min_index = min(list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr)))))\n",
- " tpr_fpr_row.append('%.4f' % tpr[min_index])\n",
- " tpr_fpr_table.add_row(tpr_fpr_row)\n",
- "plt.xlim([10**-6, 0.1])\n",
- "plt.ylim([0.3, 1.0])\n",
- "plt.grid(linestyle='--', linewidth=1)\n",
- "plt.xticks(x_labels) \n",
- "plt.yticks(np.linspace(0.3, 1.0, 8, endpoint=True)) \n",
- "plt.xscale('log')\n",
- "plt.xlabel('False Positive Rate')\n",
- "plt.ylabel('True Positive Rate')\n",
- "plt.title('ROC on IJB-C')\n",
- "plt.legend(loc=\"lower right\")\n",
- "plt.show()\n",
- "#fig.savefig('IJB-C.pdf')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| Methods | 1e-06 | 1e-05 | 0.0001 | 0.001 | 0.01 | 0.1 |\n",
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F2) | 0.7444 | 0.8751 | 0.9279 | 0.9635 | 0.9841 | 0.9939 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D0F0) | 0.6863 | 0.8554 | 0.9199 | 0.9586 | 0.9820 | 0.9934 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F1) | 0.7586 | 0.8717 | 0.9253 | 0.9620 | 0.9836 | 0.9937 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D0F0) | 0.7081 | 0.8612 | 0.9214 | 0.9595 | 0.9823 | 0.9934 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N1D1F0) | 0.7470 | 0.8675 | 0.9245 | 0.9610 | 0.9830 | 0.9935 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D1F0) | 0.7637 | 0.8733 | 0.9258 | 0.9617 | 0.9831 | 0.9936 |\n",
- "| VGG2-ResNet50-ArcFace-TestMode(N0D1F2) | 0.7668 | 0.8796 | 0.9289 | 0.9636 | 0.9840 | 0.9941 |\n",
- "+----------------------------------------+--------+--------+--------+--------+--------+--------+\n"
- ]
- }
- ],
- "source": [
- "print(tpr_fpr_table)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# setting N0D1F2 is the best"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.15"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/evaluation/IJB/readme.txt b/evaluation/IJB/readme.txt
deleted file mode 100644
index f6027e1..0000000
--- a/evaluation/IJB/readme.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-To reproduce the figures and tables in the notebook, please download everything (model, code, data and meta info) from here:
-[Dropbox] https://www.dropbox.com/s/33a6haw7v79e5qe/IJB_release.tar?dl=0
-or
-[Baidu Cloud] https://pan.baidu.com/s/1oer0p4_mcOrs4cfdeWfbFg
-
-Please apply for the IJB-B and IJB-C by yourself and strictly follow their distribution licenses.
-
-Aknowledgement
-Great thanks for Weidi Xie's instruction [2,3,4,5] to evaluate ArcFace [1] on IJB-B[6] and IJB-C[7].
-
-[1] Jiankang Deng, Jia Guo, Niannan Xue, Stefanos Zafeiriou. Arcface: Additive angular margin loss for deep face recognition[J]. arXiv:1801.07698, 2018.
-[2] https://github.com/ox-vgg/vgg_face2.
-[3] Qiong Cao, Li Shen, Weidi Xie, Omkar M Parkhi, Andrew Zisserman. VGGFace2: A dataset for recognising faces across pose and age. FG, 2018.
-[4] Weidi Xie, Andrew Zisserman. Multicolumn Networks for Face Recognition. BMVC 2018.
-[5] Weidi Xie, Li Shen, Andrew Zisserman. Comparator Networks. ECCV, 2018.
-[6] Whitelam, Cameron, Emma Taborsky, Austin Blanton, Brianna Maze, Jocelyn C. Adams, Tim Miller, Nathan D. Kalka et al. IARPA Janus Benchmark-B Face Dataset. CVPR Workshops, 2017.
-[7] Maze, Brianna, Jocelyn Adams, James A. Duncan, Nathan Kalka, Tim Miller, Charles Otto, Anil K. Jain et al. IARPA Janus Benchmark–C: Face Dataset and Protocol. ICB, 2018.
-
-
diff --git a/examples/README.md b/examples/README.md
new file mode 100644
index 0000000..a414efb
--- /dev/null
+++ b/examples/README.md
@@ -0,0 +1,4 @@
+InsightFace Example
+---
+
+Before running the examples, please install insightface package via `pip install -U insightface`
diff --git a/examples/demo_analysis.py b/examples/demo_analysis.py
new file mode 100644
index 0000000..28045da
--- /dev/null
+++ b/examples/demo_analysis.py
@@ -0,0 +1,34 @@
+import argparse
+import cv2
+import sys
+import numpy as np
+import insightface
+from insightface.app import FaceAnalysis
+from insightface.data import get_image as ins_get_image
+
+assert insightface.__version__>='0.3'
+
+parser = argparse.ArgumentParser(description='insightface app test')
+# general
+parser.add_argument('--ctx', default=0, type=int, help='ctx id, <0 means using cpu')
+parser.add_argument('--det-size', default=640, type=int, help='detection size')
+args = parser.parse_args()
+
+app = FaceAnalysis()
+app.prepare(ctx_id=args.ctx, det_size=(args.det_size,args.det_size))
+
+img = ins_get_image('t1')
+faces = app.get(img)
+assert len(faces)==6
+rimg = app.draw_on(img, faces)
+cv2.imwrite("./t1_output.jpg", rimg)
+
+# then print all-to-all face similarity
+feats = []
+for face in faces:
+ feats.append(face.normed_embedding)
+feats = np.array(feats, dtype=np.float32)
+sims = np.dot(feats, feats.T)
+print(sims)
+
+
diff --git a/examples/mask_renderer.py b/examples/mask_renderer.py
new file mode 100644
index 0000000..5b04742
--- /dev/null
+++ b/examples/mask_renderer.py
@@ -0,0 +1,22 @@
+import os, sys, datetime
+import numpy as np
+import os.path as osp
+import cv2
+import insightface
+from insightface.app import MaskRenderer
+from insightface.data import get_image as ins_get_image
+
+
+if __name__ == "__main__":
+ #make sure that you have download correct insightface model pack.
+ #make sure that BFM.mat and BFM_UV.mat have been generated
+ tool = MaskRenderer()
+ tool.prepare(ctx_id=0, det_size=(128,128))
+ image = ins_get_image('Tom_Hanks_54745')
+ mask_image = "mask_blue"
+ params = tool.build_params(image)
+ mask_out = tool.render_mask(image, mask_image, params)
+
+ cv2.imwrite('output_mask.jpg', mask_out)
+
+
diff --git a/deploy/mxnet_to_ort.py b/examples/mxnet_to_onnx.py
similarity index 56%
rename from deploy/mxnet_to_ort.py
rename to examples/mxnet_to_onnx.py
index 16f9f2f..5a9c73a 100644
--- a/deploy/mxnet_to_ort.py
+++ b/examples/mxnet_to_onnx.py
@@ -2,10 +2,13 @@ import sys
import os
import argparse
import onnx
+import json
import mxnet as mx
from onnx import helper
from onnx import TensorProto
from onnx import numpy_helper
+import onnxruntime
+import cv2
print('mxnet version:', mx.__version__)
print('onnx version:', onnx.__version__)
@@ -23,12 +26,15 @@ def create_map(graph_member_list):
return member_map
-parser = argparse.ArgumentParser(description='convert arcface models to onnx')
+parser = argparse.ArgumentParser(description='convert mxnet model to onnx')
# general
parser.add_argument('params', default='./r100a/model-0000.params', help='mxnet params to load.')
parser.add_argument('output', default='./r100a.onnx', help='path to write onnx model.')
parser.add_argument('--eps', default=1.0e-8, type=float, help='eps for weights.')
parser.add_argument('--input-shape', default='3,112,112', help='input shape.')
+parser.add_argument('--check', action='store_true')
+parser.add_argument('--input-mean', default=0.0, type=float, help='input mean for checking.')
+parser.add_argument('--input-std', default=1.0, type=float, help='input std for checking.')
args = parser.parse_args()
input_shape = (1,) + tuple( [int(x) for x in args.input_shape.split(',')] )
@@ -41,6 +47,29 @@ assert os.path.exists(sym_file)
assert os.path.exists(params_file)
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
+
+nodes = json.loads(sym.tojson())['nodes']
+bn_fixgamma_list = []
+for nodeid, node in enumerate(nodes):
+ if node['op'] == 'BatchNorm':
+ attr = node['attrs']
+ fix_gamma = False
+ if attr is not None and 'fix_gamma' in attr:
+ if str(attr['fix_gamma']).lower()=='true':
+ fix_gamma = True
+ if fix_gamma:
+ bn_fixgamma_list.append(node['name'])
+ #print(node, fix_gamma)
+
+print('fixgamma list:', bn_fixgamma_list)
+layer = None
+#layer = 'conv_2_dw_relu' #for debug
+
+if layer is not None:
+ all_layers = sym.get_internals()
+ sym = all_layers[layer + '_output']
+
+
eps = args.eps
arg = {}
@@ -50,24 +79,27 @@ ac = 0
for k in arg_params:
v = arg_params[k]
nv = v.asnumpy()
- #print(k, nv.dtype)
nv = nv.astype(np.float32)
+ #print(k, nv.shape)
+ if k.endswith('_gamma'):
+ bnname = k[:-6]
+ if bnname in bn_fixgamma_list:
+ nv[:] = 1.0
ac += nv.size
invalid += np.count_nonzero(np.abs(nv)= len(self.seq):
- raise StopIteration
- idx = self.seq[self.cur]
- self.cur += 1
- s = self.imgrec.read_idx(idx)
- header, img = recordio.unpack(s)
- label = header.label
- return label, img, None, None
-
- def brightness_aug(self, src, x):
- alpha = 1.0 + random.uniform(-x, x)
- src *= alpha
- return src
-
- def contrast_aug(self, src, x):
- alpha = 1.0 + random.uniform(-x, x)
- coef = nd.array([[[0.299, 0.587, 0.114]]])
- gray = src * coef
- gray = (3.0 * (1.0 - alpha) / gray.size) * nd.sum(gray)
- src *= alpha
- src += gray
- return src
-
- def saturation_aug(self, src, x):
- alpha = 1.0 + random.uniform(-x, x)
- coef = nd.array([[[0.299, 0.587, 0.114]]])
- gray = src * coef
- gray = nd.sum(gray, axis=2, keepdims=True)
- gray *= (1.0 - alpha)
- src *= alpha
- src += gray
- return src
-
- def color_aug(self, img, x):
- #augs = [self.brightness_aug, self.contrast_aug, self.saturation_aug]
- #random.shuffle(augs)
- #for aug in augs:
- # #print(img.shape)
- # img = aug(img, x)
- # #print(img.shape)
- #return img
- return self.CJA(img)
-
- def mirror_aug(self, img):
- _rd = random.randint(0, 1)
- if _rd == 1:
- for c in range(img.shape[2]):
- img[:, :, c] = np.fliplr(img[:, :, c])
- return img
-
- def compress_aug(self, img):
- buf = BytesIO()
- img = Image.fromarray(img.asnumpy(), 'RGB')
- q = random.randint(2, 20)
- img.save(buf, format='JPEG', quality=q)
- buf = buf.getvalue()
- img = Image.open(BytesIO(buf))
- return nd.array(np.asarray(img, 'float32'))
-
- def next(self):
- if not self.is_init:
- self.reset()
- self.is_init = True
- """Returns the next batch of data."""
- #print('in next', self.cur, self.labelcur)
- self.nbatch += 1
- batch_size = self.batch_size
- c, h, w = self.data_shape
- batch_data = nd.empty((batch_size, c, h, w))
- if self.provide_label is not None:
- batch_label = nd.empty(self.provide_label[0][1])
- i = 0
- try:
- while i < batch_size:
- #print('XXXX', i)
- label, s, bbox, landmark = self.next_sample()
- gender = int(label[0])
- age = int(label[1])
- assert age >= 0
- #assert gender==0 or gender==1
- plabel = np.zeros(shape=(101, ), dtype=np.float32)
- plabel[0] = gender
- if age == 0:
- age = 1
- if age > 100:
- age = 100
- plabel[1:age + 1] = 1
- label = plabel
- _data = self.imdecode(s)
- if _data.shape[0] != self.data_shape[1]:
- _data = mx.image.resize_short(_data, self.data_shape[1])
- if self.rand_mirror:
- _rd = random.randint(0, 1)
- if _rd == 1:
- _data = mx.ndarray.flip(data=_data, axis=1)
- if self.color_jittering > 0:
- if self.color_jittering > 1:
- _rd = random.randint(0, 1)
- if _rd == 1:
- _data = self.compress_aug(_data)
- #print('do color aug')
- _data = _data.astype('float32', copy=False)
- #print(_data.__class__)
- _data = self.color_aug(_data, 0.125)
- if self.nd_mean is not None:
- _data = _data.astype('float32', copy=False)
- _data -= self.nd_mean
- _data *= 0.0078125
- if self.cutoff > 0:
- _rd = random.randint(0, 1)
- if _rd == 1:
- #print('do cutoff aug', self.cutoff)
- centerh = random.randint(0, _data.shape[0] - 1)
- centerw = random.randint(0, _data.shape[1] - 1)
- half = self.cutoff // 2
- starth = max(0, centerh - half)
- endh = min(_data.shape[0], centerh + half)
- startw = max(0, centerw - half)
- endw = min(_data.shape[1], centerw + half)
- #print(starth, endh, startw, endw, _data.shape)
- _data[starth:endh, startw:endw, :] = 128
- data = [_data]
- for datum in data:
- assert i < batch_size, 'Batch size must be multiples of augmenter output length'
- #print(datum.shape)
- batch_data[i][:] = self.postprocess_data(datum)
- batch_label[i][:] = label
- i += 1
- except StopIteration:
- if i < batch_size:
- raise StopIteration
-
- return io.DataBatch([batch_data], [batch_label], batch_size - i)
-
- def check_data_shape(self, data_shape):
- """Checks if the input data shape is valid"""
- if not len(data_shape) == 3:
- raise ValueError(
- 'data_shape should have length 3, with dimensions CxHxW')
- if not data_shape[0] == 3:
- raise ValueError(
- 'This iterator expects inputs to have 3 channels.')
-
- def check_valid_image(self, data):
- """Checks if the input data is valid"""
- if len(data[0].shape) == 0:
- raise RuntimeError('Data shape is wrong')
-
- def imdecode(self, s):
- """Decodes a string or byte string to an NDArray.
- See mx.img.imdecode for more details."""
- img = mx.image.imdecode(s) #mx.ndarray
- return img
-
- def read_image(self, fname):
- """Reads an input image `fname` and returns the decoded raw bytes.
-
- Example usage:
- ----------
- >>> dataIter.read_image('Face.jpg') # returns decoded raw bytes.
- """
- with open(os.path.join(self.path_root, fname), 'rb') as fin:
- img = fin.read()
- return img
-
- def augmentation_transform(self, data):
- """Transforms input data with specified augmentation."""
- for aug in self.auglist:
- data = [ret for src in data for ret in aug(src)]
- return data
-
- def postprocess_data(self, datum):
- """Final postprocessing step before image is loaded into the batch."""
- return nd.transpose(datum, axes=(2, 0, 1))
-
-
-class FaceImageIterList(io.DataIter):
- def __init__(self, iter_list):
- assert len(iter_list) > 0
- self.provide_data = iter_list[0].provide_data
- self.provide_label = iter_list[0].provide_label
- self.iter_list = iter_list
- self.cur_iter = None
-
- def reset(self):
- self.cur_iter.reset()
-
- def next(self):
- self.cur_iter = random.choice(self.iter_list)
- while True:
- try:
- ret = self.cur_iter.next()
- except StopIteration:
- self.cur_iter.reset()
- continue
- return ret
diff --git a/gender-age/face_model.py b/gender-age/face_model.py
deleted file mode 100644
index 1ce54bb..0000000
--- a/gender-age/face_model.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from scipy import misc
-import sys
-import os
-import argparse
-#import tensorflow as tf
-import numpy as np
-import mxnet as mx
-import random
-import cv2
-import sklearn
-from sklearn.decomposition import PCA
-from time import sleep
-from easydict import EasyDict as edict
-from mtcnn_detector import MtcnnDetector
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src', 'common'))
-import face_image
-import face_preprocess
-
-
-def do_flip(data):
- for idx in range(data.shape[0]):
- data[idx, :, :] = np.fliplr(data[idx, :, :])
-
-
-def get_model(ctx, image_size, model_str, layer):
- _vec = model_str.split(',')
- assert len(_vec) == 2
- prefix = _vec[0]
- epoch = int(_vec[1])
- print('loading', prefix, epoch)
- sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
- all_layers = sym.get_internals()
- sym = all_layers[layer + '_output']
- model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
- #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
- model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
- model.set_params(arg_params, aux_params)
- return model
-
-
-class FaceModel:
- def __init__(self, args):
- self.args = args
- if args.gpu >= 0:
- ctx = mx.gpu(args.gpu)
- else:
- ctx = mx.cpu()
- _vec = args.image_size.split(',')
- assert len(_vec) == 2
- image_size = (int(_vec[0]), int(_vec[1]))
- self.model = None
- if len(args.model) > 0:
- self.model = get_model(ctx, image_size, args.model, 'fc1')
-
- self.det_minsize = 50
- self.det_threshold = [0.6, 0.7, 0.8]
- #self.det_factor = 0.9
- self.image_size = image_size
- mtcnn_path = os.path.join(os.path.dirname(__file__), 'mtcnn-model')
- if args.det == 0:
- detector = MtcnnDetector(model_folder=mtcnn_path,
- ctx=ctx,
- num_worker=1,
- accurate_landmark=True,
- threshold=self.det_threshold)
- else:
- detector = MtcnnDetector(model_folder=mtcnn_path,
- ctx=ctx,
- num_worker=1,
- accurate_landmark=True,
- threshold=[0.0, 0.0, 0.2])
- self.detector = detector
-
- def get_input(self, face_img):
- ret = self.detector.detect_face(face_img, det_type=self.args.det)
- if ret is None:
- return None
- bbox, points = ret
- if bbox.shape[0] == 0:
- return None
- bbox = bbox[0, 0:4]
- points = points[0, :].reshape((2, 5)).T
- #print(bbox)
- #print(points)
- nimg = face_preprocess.preprocess(face_img,
- bbox,
- points,
- image_size='112,112')
- nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
- aligned = np.transpose(nimg, (2, 0, 1))
- input_blob = np.expand_dims(aligned, axis=0)
- data = mx.nd.array(input_blob)
- db = mx.io.DataBatch(data=(data, ))
- return db
-
- def get_ga(self, data):
- self.model.forward(data, is_train=False)
- ret = self.model.get_outputs()[0].asnumpy()
- g = ret[:, 0:2].flatten()
- gender = np.argmax(g)
- a = ret[:, 2:202].reshape((100, 2))
- a = np.argmax(a, axis=1)
- age = int(sum(a))
-
- return gender, age
diff --git a/gender-age/helper.py b/gender-age/helper.py
deleted file mode 100644
index 38f2c9c..0000000
--- a/gender-age/helper.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# coding: utf-8
-# YuanYang
-import math
-import cv2
-import numpy as np
-
-
-def nms(boxes, overlap_threshold, mode='Union'):
- """
- non max suppression
-
- Parameters:
- ----------
- box: numpy array n x 5
- input bbox array
- overlap_threshold: float number
- threshold of overlap
- mode: float number
- how to compute overlap ratio, 'Union' or 'Min'
- Returns:
- -------
- index array of the selected bbox
- """
- # if there are no boxes, return an empty list
- if len(boxes) == 0:
- return []
-
- # if the bounding boxes integers, convert them to floats
- if boxes.dtype.kind == "i":
- boxes = boxes.astype("float")
-
- # initialize the list of picked indexes
- pick = []
-
- # grab the coordinates of the bounding boxes
- x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
-
- area = (x2 - x1 + 1) * (y2 - y1 + 1)
- idxs = np.argsort(score)
-
- # keep looping while some indexes still remain in the indexes list
- while len(idxs) > 0:
- # grab the last index in the indexes list and add the index value to the list of picked indexes
- last = len(idxs) - 1
- i = idxs[last]
- pick.append(i)
-
- xx1 = np.maximum(x1[i], x1[idxs[:last]])
- yy1 = np.maximum(y1[i], y1[idxs[:last]])
- xx2 = np.minimum(x2[i], x2[idxs[:last]])
- yy2 = np.minimum(y2[i], y2[idxs[:last]])
-
- # compute the width and height of the bounding box
- w = np.maximum(0, xx2 - xx1 + 1)
- h = np.maximum(0, yy2 - yy1 + 1)
-
- inter = w * h
- if mode == 'Min':
- overlap = inter / np.minimum(area[i], area[idxs[:last]])
- else:
- overlap = inter / (area[i] + area[idxs[:last]] - inter)
-
- # delete all indexes from the index list that have
- idxs = np.delete(
- idxs,
- np.concatenate(([last], np.where(overlap > overlap_threshold)[0])))
-
- return pick
-
-
-def adjust_input(in_data):
- """
- adjust the input from (h, w, c) to ( 1, c, h, w) for network input
-
- Parameters:
- ----------
- in_data: numpy array of shape (h, w, c)
- input data
- Returns:
- -------
- out_data: numpy array of shape (1, c, h, w)
- reshaped array
- """
- if in_data.dtype is not np.dtype('float32'):
- out_data = in_data.astype(np.float32)
- else:
- out_data = in_data
-
- out_data = out_data.transpose((2, 0, 1))
- out_data = np.expand_dims(out_data, 0)
- out_data = (out_data - 127.5) * 0.0078125
- return out_data
-
-
-def generate_bbox(map, reg, scale, threshold):
- """
- generate bbox from feature map
- Parameters:
- ----------
- map: numpy array , n x m x 1
- detect score for each position
- reg: numpy array , n x m x 4
- bbox
- scale: float number
- scale of this detection
- threshold: float number
- detect threshold
- Returns:
- -------
- bbox array
- """
- stride = 2
- cellsize = 12
-
- t_index = np.where(map > threshold)
-
- # find nothing
- if t_index[0].size == 0:
- return np.array([])
-
- dx1, dy1, dx2, dy2 = [reg[0, i, t_index[0], t_index[1]] for i in range(4)]
-
- reg = np.array([dx1, dy1, dx2, dy2])
- score = map[t_index[0], t_index[1]]
- boundingbox = np.vstack([
- np.round((stride * t_index[1] + 1) / scale),
- np.round((stride * t_index[0] + 1) / scale),
- np.round((stride * t_index[1] + 1 + cellsize) / scale),
- np.round((stride * t_index[0] + 1 + cellsize) / scale), score, reg
- ])
-
- return boundingbox.T
-
-
-def detect_first_stage(img, net, scale, threshold):
- """
- run PNet for first stage
-
- Parameters:
- ----------
- img: numpy array, bgr order
- input image
- scale: float number
- how much should the input image scale
- net: PNet
- worker
- Returns:
- -------
- total_boxes : bboxes
- """
- height, width, _ = img.shape
- hs = int(math.ceil(height * scale))
- ws = int(math.ceil(width * scale))
-
- im_data = cv2.resize(img, (ws, hs))
-
- # adjust for the network input
- input_buf = adjust_input(im_data)
- output = net.predict(input_buf)
- boxes = generate_bbox(output[1][0, 1, :, :], output[0], scale, threshold)
-
- if boxes.size == 0:
- return None
-
- # nms
- pick = nms(boxes[:, 0:5], 0.5, mode='Union')
- boxes = boxes[pick]
- return boxes
-
-
-def detect_first_stage_warpper(args):
- return detect_first_stage(*args)
diff --git a/gender-age/model/model-0000.params b/gender-age/model/model-0000.params
deleted file mode 100644
index 35118ad..0000000
Binary files a/gender-age/model/model-0000.params and /dev/null differ
diff --git a/gender-age/model/model-symbol.json b/gender-age/model/model-symbol.json
deleted file mode 100644
index cea9abc..0000000
--- a/gender-age/model/model-symbol.json
+++ /dev/null
@@ -1,2399 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "name": "data",
- "inputs": []
- },
- {
- "op": "_minus_scalar",
- "name": "_minusscalar0",
- "attrs": {"scalar": "127.5"},
- "inputs": [[0, 0, 0]]
- },
- {
- "op": "_mul_scalar",
- "name": "_mulscalar0",
- "attrs": {"scalar": "0.0078125"},
- "inputs": [[1, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_1_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "8",
- "num_group": "1",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_1_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "8",
- "num_group": "1",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[2, 0, 0], [3, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_1_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_1_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_1_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_1_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_1_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[4, 0, 0], [5, 0, 0], [6, 0, 0], [7, 0, 1], [8, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_1_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[9, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_2_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "8",
- "num_group": "8",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_2_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "8",
- "num_group": "8",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[10, 0, 0], [11, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_2_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_2_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[12, 0, 0], [13, 0, 0], [14, 0, 0], [15, 0, 1], [16, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_2_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[17, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_2_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "16",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_2_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "16",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[18, 0, 0], [19, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_2_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_2_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_2_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[20, 0, 0], [21, 0, 0], [22, 0, 0], [23, 0, 1], [24, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_2_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[25, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_3_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "16",
- "num_group": "16",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_3_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "16",
- "num_group": "16",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": [[26, 0, 0], [27, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_3_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_3_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[28, 0, 0], [29, 0, 0], [30, 0, 0], [31, 0, 1], [32, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_3_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[33, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_3_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_3_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[34, 0, 0], [35, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_3_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_3_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_3_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[36, 0, 0], [37, 0, 0], [38, 0, 0], [39, 0, 1], [40, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_3_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[41, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_4_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "32",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_4_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "32",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[42, 0, 0], [43, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_4_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_4_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[44, 0, 0], [45, 0, 0], [46, 0, 0], [47, 0, 1], [48, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_4_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[49, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_4_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_4_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[50, 0, 0], [51, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_4_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_4_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_4_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[52, 0, 0], [53, 0, 0], [54, 0, 0], [55, 0, 1], [56, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_4_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[57, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_5_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "32",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_5_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "32",
- "num_group": "32",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": [[58, 0, 0], [59, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_5_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_5_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[60, 0, 0], [61, 0, 0], [62, 0, 0], [63, 0, 1], [64, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_5_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[65, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_5_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_5_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[66, 0, 0], [67, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_5_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_5_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_5_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[68, 0, 0], [69, 0, 0], [70, 0, 0], [71, 0, 1], [72, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_5_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[73, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_6_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "64",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_6_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "64",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[74, 0, 0], [75, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_6_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_6_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[76, 0, 0], [77, 0, 0], [78, 0, 0], [79, 0, 1], [80, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_6_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[81, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_6_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_6_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[82, 0, 0], [83, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_6_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_6_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_6_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[84, 0, 0], [85, 0, 0], [86, 0, 0], [87, 0, 1], [88, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_6_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[89, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_7_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "64",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_7_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "64",
- "num_group": "64",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": [[90, 0, 0], [91, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_7_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_7_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[92, 0, 0], [93, 0, 0], [94, 0, 0], [95, 0, 1], [96, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_7_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[97, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_7_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_7_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[98, 0, 0], [99, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_7_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_7_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_7_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[100, 0, 0], [101, 0, 0], [102, 0, 0], [103, 0, 1], [104, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_7_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[105, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_8_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_8_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[106, 0, 0], [107, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_8_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_8_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[108, 0, 0], [109, 0, 0], [110, 0, 0], [111, 0, 1], [112, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_8_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[113, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_8_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_8_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[114, 0, 0], [115, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_8_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_8_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_8_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[116, 0, 0], [117, 0, 0], [118, 0, 0], [119, 0, 1], [120, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_8_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[121, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_9_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_9_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[122, 0, 0], [123, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_9_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_9_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[124, 0, 0], [125, 0, 0], [126, 0, 0], [127, 0, 1], [128, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_9_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[129, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_9_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_9_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[130, 0, 0], [131, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_9_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_9_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_9_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[132, 0, 0], [133, 0, 0], [134, 0, 0], [135, 0, 1], [136, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_9_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[137, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_10_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_10_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[138, 0, 0], [139, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_10_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_10_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[140, 0, 0], [141, 0, 0], [142, 0, 0], [143, 0, 1], [144, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_10_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[145, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_10_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_10_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[146, 0, 0], [147, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_10_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_10_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_10_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[148, 0, 0], [149, 0, 0], [150, 0, 0], [151, 0, 1], [152, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_10_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[153, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_11_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_11_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[154, 0, 0], [155, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_11_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_11_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[156, 0, 0], [157, 0, 0], [158, 0, 0], [159, 0, 1], [160, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_11_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[161, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_11_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_11_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[162, 0, 0], [163, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_11_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_11_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_11_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[164, 0, 0], [165, 0, 0], [166, 0, 0], [167, 0, 1], [168, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_11_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[169, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_12_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_12_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[170, 0, 0], [171, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_12_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_12_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[172, 0, 0], [173, 0, 0], [174, 0, 0], [175, 0, 1], [176, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_12_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[177, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_12_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_12_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[178, 0, 0], [179, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_12_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_12_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_12_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[180, 0, 0], [181, 0, 0], [182, 0, 0], [183, 0, 1], [184, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_12_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[185, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_13_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_13_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "128",
- "num_group": "128",
- "pad": "(1, 1)",
- "stride": "(2, 2)"
- },
- "inputs": [[186, 0, 0], [187, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_13_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_13_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[188, 0, 0], [189, 0, 0], [190, 0, 0], [191, 0, 1], [192, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_13_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[193, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_13_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_13_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[194, 0, 0], [195, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_13_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_13_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_13_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[196, 0, 0], [197, 0, 0], [198, 0, 0], [199, 0, 1], [200, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_13_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[201, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_14_dw_conv2d_weight",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "256",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_14_dw_conv2d",
- "attrs": {
- "kernel": "(3, 3)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "256",
- "pad": "(1, 1)",
- "stride": "(1, 1)"
- },
- "inputs": [[202, 0, 0], [203, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_14_dw_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_dw_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_dw_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_dw_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_14_dw_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[204, 0, 0], [205, 0, 0], [206, 0, 0], [207, 0, 1], [208, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_14_dw_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[209, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_14_conv2d_weight",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": []
- },
- {
- "op": "Convolution",
- "name": "conv_14_conv2d",
- "attrs": {
- "kernel": "(1, 1)",
- "no_bias": "True",
- "num_filter": "256",
- "num_group": "1",
- "pad": "(0, 0)",
- "stride": "(1, 1)"
- },
- "inputs": [[210, 0, 0], [211, 0, 0]]
- },
- {
- "op": "null",
- "name": "conv_14_batchnorm_gamma",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_batchnorm_beta",
- "attrs": {"fix_gamma": "True"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_batchnorm_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "conv_14_batchnorm_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "fix_gamma": "True"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "conv_14_batchnorm",
- "attrs": {"fix_gamma": "True"},
- "inputs": [[212, 0, 0], [213, 0, 0], [214, 0, 0], [215, 0, 1], [216, 0, 1]]
- },
- {
- "op": "Activation",
- "name": "conv_14_relu",
- "attrs": {"act_type": "relu"},
- "inputs": [[217, 0, 0]]
- },
- {
- "op": "null",
- "name": "bn1_gamma",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "False",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "bn1_beta",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "False",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "bn1_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "eps": "2e-05",
- "fix_gamma": "False",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "bn1_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "eps": "2e-05",
- "fix_gamma": "False",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "bn1",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "False",
- "momentum": "0.9"
- },
- "inputs": [[218, 0, 0], [219, 0, 0], [220, 0, 0], [221, 0, 1], [222, 0, 1]]
- },
- {
- "op": "null",
- "name": "relu1_gamma",
- "attrs": {
- "__init__": "[\"Constant\", {\"value\": 0.25}]",
- "act_type": "prelu"
- },
- "inputs": []
- },
- {
- "op": "LeakyReLU",
- "name": "relu1",
- "attrs": {"act_type": "prelu"},
- "inputs": [[223, 0, 0], [224, 0, 0]]
- },
- {
- "op": "Pooling",
- "name": "pool1",
- "attrs": {
- "global_pool": "True",
- "kernel": "(7, 7)",
- "pool_type": "avg"
- },
- "inputs": [[225, 0, 0]]
- },
- {
- "op": "Flatten",
- "name": "flatten0",
- "inputs": [[226, 0, 0]]
- },
- {
- "op": "null",
- "name": "pre_fc1_weight",
- "attrs": {"num_hidden": "202"},
- "inputs": []
- },
- {
- "op": "null",
- "name": "pre_fc1_bias",
- "attrs": {"num_hidden": "202"},
- "inputs": []
- },
- {
- "op": "FullyConnected",
- "name": "pre_fc1",
- "attrs": {"num_hidden": "202"},
- "inputs": [[227, 0, 0], [228, 0, 0], [229, 0, 0]]
- },
- {
- "op": "null",
- "name": "fc1_gamma",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "True",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "fc1_beta",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "True",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "fc1_moving_mean",
- "attrs": {
- "__init__": "[\"zero\", {}]",
- "eps": "2e-05",
- "fix_gamma": "True",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "null",
- "name": "fc1_moving_var",
- "attrs": {
- "__init__": "[\"one\", {}]",
- "eps": "2e-05",
- "fix_gamma": "True",
- "momentum": "0.9"
- },
- "inputs": []
- },
- {
- "op": "BatchNorm",
- "name": "fc1",
- "attrs": {
- "eps": "2e-05",
- "fix_gamma": "True",
- "momentum": "0.9"
- },
- "inputs": [[230, 0, 0], [231, 0, 0], [232, 0, 0], [233, 0, 1], [234, 0, 1]]
- }
- ],
- "arg_nodes": [
- 0,
- 3,
- 5,
- 6,
- 7,
- 8,
- 11,
- 13,
- 14,
- 15,
- 16,
- 19,
- 21,
- 22,
- 23,
- 24,
- 27,
- 29,
- 30,
- 31,
- 32,
- 35,
- 37,
- 38,
- 39,
- 40,
- 43,
- 45,
- 46,
- 47,
- 48,
- 51,
- 53,
- 54,
- 55,
- 56,
- 59,
- 61,
- 62,
- 63,
- 64,
- 67,
- 69,
- 70,
- 71,
- 72,
- 75,
- 77,
- 78,
- 79,
- 80,
- 83,
- 85,
- 86,
- 87,
- 88,
- 91,
- 93,
- 94,
- 95,
- 96,
- 99,
- 101,
- 102,
- 103,
- 104,
- 107,
- 109,
- 110,
- 111,
- 112,
- 115,
- 117,
- 118,
- 119,
- 120,
- 123,
- 125,
- 126,
- 127,
- 128,
- 131,
- 133,
- 134,
- 135,
- 136,
- 139,
- 141,
- 142,
- 143,
- 144,
- 147,
- 149,
- 150,
- 151,
- 152,
- 155,
- 157,
- 158,
- 159,
- 160,
- 163,
- 165,
- 166,
- 167,
- 168,
- 171,
- 173,
- 174,
- 175,
- 176,
- 179,
- 181,
- 182,
- 183,
- 184,
- 187,
- 189,
- 190,
- 191,
- 192,
- 195,
- 197,
- 198,
- 199,
- 200,
- 203,
- 205,
- 206,
- 207,
- 208,
- 211,
- 213,
- 214,
- 215,
- 216,
- 219,
- 220,
- 221,
- 222,
- 224,
- 228,
- 229,
- 231,
- 232,
- 233,
- 234
- ],
- "node_row_ptr": [
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 12,
- 13,
- 14,
- 15,
- 16,
- 17,
- 18,
- 19,
- 22,
- 23,
- 24,
- 25,
- 26,
- 27,
- 28,
- 29,
- 32,
- 33,
- 34,
- 35,
- 36,
- 37,
- 38,
- 39,
- 42,
- 43,
- 44,
- 45,
- 46,
- 47,
- 48,
- 49,
- 52,
- 53,
- 54,
- 55,
- 56,
- 57,
- 58,
- 59,
- 62,
- 63,
- 64,
- 65,
- 66,
- 67,
- 68,
- 69,
- 72,
- 73,
- 74,
- 75,
- 76,
- 77,
- 78,
- 79,
- 82,
- 83,
- 84,
- 85,
- 86,
- 87,
- 88,
- 89,
- 92,
- 93,
- 94,
- 95,
- 96,
- 97,
- 98,
- 99,
- 102,
- 103,
- 104,
- 105,
- 106,
- 107,
- 108,
- 109,
- 112,
- 113,
- 114,
- 115,
- 116,
- 117,
- 118,
- 119,
- 122,
- 123,
- 124,
- 125,
- 126,
- 127,
- 128,
- 129,
- 132,
- 133,
- 134,
- 135,
- 136,
- 137,
- 138,
- 139,
- 142,
- 143,
- 144,
- 145,
- 146,
- 147,
- 148,
- 149,
- 152,
- 153,
- 154,
- 155,
- 156,
- 157,
- 158,
- 159,
- 162,
- 163,
- 164,
- 165,
- 166,
- 167,
- 168,
- 169,
- 172,
- 173,
- 174,
- 175,
- 176,
- 177,
- 178,
- 179,
- 182,
- 183,
- 184,
- 185,
- 186,
- 187,
- 188,
- 189,
- 192,
- 193,
- 194,
- 195,
- 196,
- 197,
- 198,
- 199,
- 202,
- 203,
- 204,
- 205,
- 206,
- 207,
- 208,
- 209,
- 212,
- 213,
- 214,
- 215,
- 216,
- 217,
- 218,
- 219,
- 222,
- 223,
- 224,
- 225,
- 226,
- 227,
- 228,
- 229,
- 232,
- 233,
- 234,
- 235,
- 236,
- 237,
- 238,
- 239,
- 242,
- 243,
- 244,
- 245,
- 246,
- 247,
- 248,
- 249,
- 252,
- 253,
- 254,
- 255,
- 256,
- 257,
- 258,
- 259,
- 262,
- 263,
- 264,
- 265,
- 266,
- 267,
- 268,
- 269,
- 272,
- 273,
- 274,
- 275,
- 276,
- 277,
- 280,
- 281,
- 282,
- 283,
- 284,
- 285,
- 286,
- 287,
- 288,
- 289,
- 290,
- 291,
- 294
- ],
- "heads": [[235, 0, 0]],
- "attrs": {"mxnet_version": ["int", 10300]}
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det1-0001.params b/gender-age/mtcnn-model/det1-0001.params
deleted file mode 100644
index e4b04aa..0000000
Binary files a/gender-age/mtcnn-model/det1-0001.params and /dev/null differ
diff --git a/gender-age/mtcnn-model/det1-symbol.json b/gender-age/mtcnn-model/det1-symbol.json
deleted file mode 100644
index bd9b772..0000000
--- a/gender-age/mtcnn-model/det1-symbol.json
+++ /dev/null
@@ -1,266 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "10",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(2,2)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "16",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[11, 0], [12, 0], [13, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[14, 0], [15, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(1,1)",
- "no_bias": "False",
- "num_filter": "4",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4_2",
- "inputs": [[16, 0], [17, 0], [18, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(1,1)",
- "no_bias": "False",
- "num_filter": "2",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4_1",
- "inputs": [[16, 0], [20, 0], [21, 0]],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxActivation",
- "param": {"mode": "channel"},
- "name": "prob1",
- "inputs": [[22, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 12,
- 13,
- 15,
- 17,
- 18,
- 20,
- 21
- ],
- "heads": [[19, 0], [23, 0]]
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det1.caffemodel b/gender-age/mtcnn-model/det1.caffemodel
deleted file mode 100644
index 79e93b4..0000000
Binary files a/gender-age/mtcnn-model/det1.caffemodel and /dev/null differ
diff --git a/gender-age/mtcnn-model/det1.prototxt b/gender-age/mtcnn-model/det1.prototxt
deleted file mode 100644
index c5c1657..0000000
--- a/gender-age/mtcnn-model/det1.prototxt
+++ /dev/null
@@ -1,177 +0,0 @@
-name: "PNet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 12
-input_dim: 12
-
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 10
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 2
- stride: 2
- }
-}
-
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 16
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
-}
-
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "conv2"
- top: "conv3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 32
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "PReLU3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
-}
-
-
-layer {
- name: "conv4-1"
- type: "Convolution"
- bottom: "conv3"
- top: "conv4-1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 2
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "conv4-2"
- type: "Convolution"
- bottom: "conv3"
- top: "conv4-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 0
- }
- convolution_param {
- num_output: 4
- kernel_size: 1
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv4-1"
- top: "prob1"
-}
diff --git a/gender-age/mtcnn-model/det2-0001.params b/gender-age/mtcnn-model/det2-0001.params
deleted file mode 100644
index a14a478..0000000
Binary files a/gender-age/mtcnn-model/det2-0001.params and /dev/null differ
diff --git a/gender-age/mtcnn-model/det2-symbol.json b/gender-age/mtcnn-model/det2-symbol.json
deleted file mode 100644
index a13246a..0000000
--- a/gender-age/mtcnn-model/det2-symbol.json
+++ /dev/null
@@ -1,324 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2",
- "inputs": [[11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[12, 0], [13, 0], [14, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[15, 0], [16, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "128"
- },
- "name": "conv4",
- "inputs": [[17, 0], [18, 0], [19, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[20, 0], [21, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "4"
- },
- "name": "conv5_2",
- "inputs": [[22, 0], [23, 0], [24, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "conv5_1",
- "inputs": [[22, 0], [26, 0], [27, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prob1_label",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxOutput",
- "param": {
- "grad_scale": "1",
- "ignore_label": "-1",
- "multi_output": "False",
- "normalization": "null",
- "use_ignore": "False"
- },
- "name": "prob1",
- "inputs": [[28, 0], [29, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 13,
- 14,
- 16,
- 18,
- 19,
- 21,
- 23,
- 24,
- 26,
- 27,
- 29
- ],
- "heads": [[25, 0], [30, 0]]
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det2.caffemodel b/gender-age/mtcnn-model/det2.caffemodel
deleted file mode 100644
index a5a540c..0000000
Binary files a/gender-age/mtcnn-model/det2.caffemodel and /dev/null differ
diff --git a/gender-age/mtcnn-model/det2.prototxt b/gender-age/mtcnn-model/det2.prototxt
deleted file mode 100644
index 51093e6..0000000
--- a/gender-age/mtcnn-model/det2.prototxt
+++ /dev/null
@@ -1,228 +0,0 @@
-name: "RNet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 24
-input_dim: 24
-
-
-##########################
-######################
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
- propagate_down: true
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
- propagate_down: true
-}
-layer {
- name: "pool2"
- type: "Pooling"
- bottom: "conv2"
- top: "pool2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-####################################
-
-##################################
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "pool2"
- top: "conv3"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
- propagate_down: true
-}
-###############################
-
-###############################
-
-layer {
- name: "conv4"
- type: "InnerProduct"
- bottom: "conv3"
- top: "conv4"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- inner_product_param {
- num_output: 128
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "conv4"
- top: "conv4"
-}
-
-layer {
- name: "conv5-1"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5-1"
- param {
- lr_mult: 0
- decay_mult: 0
- }
- param {
- lr_mult: 0
- decay_mult: 0
- }
- inner_product_param {
- num_output: 2
- #kernel_size: 1
- #stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv5-2"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 4
- #kernel_size: 1
- #stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv5-1"
- top: "prob1"
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det3-0001.params b/gender-age/mtcnn-model/det3-0001.params
deleted file mode 100644
index cae898b..0000000
Binary files a/gender-age/mtcnn-model/det3-0001.params and /dev/null differ
diff --git a/gender-age/mtcnn-model/det3-symbol.json b/gender-age/mtcnn-model/det3-symbol.json
deleted file mode 100644
index 00061ed..0000000
--- a/gender-age/mtcnn-model/det3-symbol.json
+++ /dev/null
@@ -1,418 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "32",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1",
- "inputs": [[0, 0], [1, 0], [2, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1",
- "inputs": [[3, 0], [4, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1",
- "inputs": [[5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2",
- "inputs": [[6, 0], [7, 0], [8, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2",
- "inputs": [[9, 0], [10, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2",
- "inputs": [[11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3",
- "inputs": [[12, 0], [13, 0], [14, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3",
- "inputs": [[15, 0], [16, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(2,2)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool3",
- "inputs": [[17, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "128",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv4",
- "inputs": [[18, 0], [19, 0], [20, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[21, 0], [22, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "256"
- },
- "name": "conv5",
- "inputs": [[23, 0], [24, 0], [25, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu5",
- "inputs": [[26, 0], [27, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "10"
- },
- "name": "conv6_3",
- "inputs": [[28, 0], [29, 0], [30, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "4"
- },
- "name": "conv6_2",
- "inputs": [[28, 0], [32, 0], [33, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv6_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "conv6_1",
- "inputs": [[28, 0], [35, 0], [36, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prob1_label",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SoftmaxOutput",
- "param": {
- "grad_scale": "1",
- "ignore_label": "-1",
- "multi_output": "False",
- "normalization": "null",
- "use_ignore": "False"
- },
- "name": "prob1",
- "inputs": [[37, 0], [38, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 1,
- 2,
- 4,
- 7,
- 8,
- 10,
- 13,
- 14,
- 16,
- 19,
- 20,
- 22,
- 24,
- 25,
- 27,
- 29,
- 30,
- 32,
- 33,
- 35,
- 36,
- 38
- ],
- "heads": [[31, 0], [34, 0], [39, 0]]
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det3.caffemodel b/gender-age/mtcnn-model/det3.caffemodel
deleted file mode 100644
index 7b4b8a4..0000000
Binary files a/gender-age/mtcnn-model/det3.caffemodel and /dev/null differ
diff --git a/gender-age/mtcnn-model/det3.prototxt b/gender-age/mtcnn-model/det3.prototxt
deleted file mode 100644
index a192307..0000000
--- a/gender-age/mtcnn-model/det3.prototxt
+++ /dev/null
@@ -1,294 +0,0 @@
-name: "ONet"
-input: "data"
-input_dim: 1
-input_dim: 3
-input_dim: 48
-input_dim: 48
-##################################
-layer {
- name: "conv1"
- type: "Convolution"
- bottom: "data"
- top: "conv1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 32
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu1"
- type: "PReLU"
- bottom: "conv1"
- top: "conv1"
-}
-layer {
- name: "pool1"
- type: "Pooling"
- bottom: "conv1"
- top: "pool1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-layer {
- name: "conv2"
- type: "Convolution"
- bottom: "pool1"
- top: "conv2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "prelu2"
- type: "PReLU"
- bottom: "conv2"
- top: "conv2"
-}
-layer {
- name: "pool2"
- type: "Pooling"
- bottom: "conv2"
- top: "pool2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv3"
- type: "Convolution"
- bottom: "pool2"
- top: "conv3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 3
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu3"
- type: "PReLU"
- bottom: "conv3"
- top: "conv3"
-}
-layer {
- name: "pool3"
- type: "Pooling"
- bottom: "conv3"
- top: "pool3"
- pooling_param {
- pool: MAX
- kernel_size: 2
- stride: 2
- }
-}
-layer {
- name: "conv4"
- type: "Convolution"
- bottom: "pool3"
- top: "conv4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 128
- kernel_size: 2
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "conv4"
- top: "conv4"
-}
-
-
-layer {
- name: "conv5"
- type: "InnerProduct"
- bottom: "conv4"
- top: "conv5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 3
- num_output: 256
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-layer {
- name: "drop5"
- type: "Dropout"
- bottom: "conv5"
- top: "conv5"
- dropout_param {
- dropout_ratio: 0.25
- }
-}
-layer {
- name: "prelu5"
- type: "PReLU"
- bottom: "conv5"
- top: "conv5"
-}
-
-
-layer {
- name: "conv6-1"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 2
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv6-2"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 4
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "conv6-3"
- type: "InnerProduct"
- bottom: "conv5"
- top: "conv6-3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- #kernel_size: 1
- num_output: 10
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-layer {
- name: "prob1"
- type: "Softmax"
- bottom: "conv6-1"
- top: "prob1"
-}
diff --git a/gender-age/mtcnn-model/det4-0001.params b/gender-age/mtcnn-model/det4-0001.params
deleted file mode 100644
index efca9a9..0000000
Binary files a/gender-age/mtcnn-model/det4-0001.params and /dev/null differ
diff --git a/gender-age/mtcnn-model/det4-symbol.json b/gender-age/mtcnn-model/det4-symbol.json
deleted file mode 100644
index aa90e2a..0000000
--- a/gender-age/mtcnn-model/det4-symbol.json
+++ /dev/null
@@ -1,1392 +0,0 @@
-{
- "nodes": [
- {
- "op": "null",
- "param": {},
- "name": "data",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "SliceChannel",
- "param": {
- "axis": "1",
- "num_outputs": "5",
- "squeeze_axis": "False"
- },
- "name": "slice",
- "inputs": [[0, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_1",
- "inputs": [[1, 0], [2, 0], [3, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_1",
- "inputs": [[4, 0], [5, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_1",
- "inputs": [[6, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_1",
- "inputs": [[7, 0], [8, 0], [9, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_1",
- "inputs": [[10, 0], [11, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_1",
- "inputs": [[12, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_1",
- "inputs": [[13, 0], [14, 0], [15, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_1",
- "inputs": [[16, 0], [17, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_2",
- "inputs": [[1, 1], [19, 0], [20, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_2",
- "inputs": [[21, 0], [22, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_2",
- "inputs": [[23, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_2",
- "inputs": [[24, 0], [25, 0], [26, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_2",
- "inputs": [[27, 0], [28, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_2",
- "inputs": [[29, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_2",
- "inputs": [[30, 0], [31, 0], [32, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_2",
- "inputs": [[33, 0], [34, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_3",
- "inputs": [[1, 2], [36, 0], [37, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_3",
- "inputs": [[38, 0], [39, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_3",
- "inputs": [[40, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_3",
- "inputs": [[41, 0], [42, 0], [43, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_3",
- "inputs": [[44, 0], [45, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_3",
- "inputs": [[46, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_3",
- "inputs": [[47, 0], [48, 0], [49, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_3",
- "inputs": [[50, 0], [51, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_4",
- "inputs": [[1, 3], [53, 0], [54, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_4",
- "inputs": [[55, 0], [56, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_4",
- "inputs": [[57, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_4",
- "inputs": [[58, 0], [59, 0], [60, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_4",
- "inputs": [[61, 0], [62, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_4",
- "inputs": [[63, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_4",
- "inputs": [[64, 0], [65, 0], [66, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_4",
- "inputs": [[67, 0], [68, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv1_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "28",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv1_5",
- "inputs": [[1, 4], [70, 0], [71, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu1_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu1_5",
- "inputs": [[72, 0], [73, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool1_5",
- "inputs": [[74, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv2_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(3,3)",
- "no_bias": "False",
- "num_filter": "48",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv2_5",
- "inputs": [[75, 0], [76, 0], [77, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu2_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu2_5",
- "inputs": [[78, 0], [79, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Pooling",
- "param": {
- "global_pool": "False",
- "kernel": "(3,3)",
- "pad": "(0,0)",
- "pool_type": "max",
- "pooling_convention": "full",
- "stride": "(2,2)"
- },
- "name": "pool2_5",
- "inputs": [[80, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "conv3_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "Convolution",
- "param": {
- "cudnn_off": "False",
- "cudnn_tune": "off",
- "dilate": "(1,1)",
- "kernel": "(2,2)",
- "no_bias": "False",
- "num_filter": "64",
- "num_group": "1",
- "pad": "(0,0)",
- "stride": "(1,1)",
- "workspace": "1024"
- },
- "name": "conv3_5",
- "inputs": [[81, 0], [82, 0], [83, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu3_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu3_5",
- "inputs": [[84, 0], [85, 0]],
- "backward_source_id": -1
- },
- {
- "op": "Concat",
- "param": {
- "dim": "1",
- "num_args": "5"
- },
- "name": "concat",
- "inputs": [[18, 0], [35, 0], [52, 0], [69, 0], [86, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "256"
- },
- "name": "fc4",
- "inputs": [[87, 0], [88, 0], [89, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4",
- "inputs": [[90, 0], [91, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_1",
- "inputs": [[92, 0], [93, 0], [94, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_1_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_1",
- "inputs": [[95, 0], [96, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_1_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_1_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_1",
- "inputs": [[97, 0], [98, 0], [99, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_2",
- "inputs": [[92, 0], [101, 0], [102, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_2_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_2",
- "inputs": [[103, 0], [104, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_2_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_2_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_2",
- "inputs": [[105, 0], [106, 0], [107, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_3",
- "inputs": [[92, 0], [109, 0], [110, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_3_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_3",
- "inputs": [[111, 0], [112, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_3_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_3_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_3",
- "inputs": [[113, 0], [114, 0], [115, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_4",
- "inputs": [[92, 0], [117, 0], [118, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_4_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_4",
- "inputs": [[119, 0], [120, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_4_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_4_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_4",
- "inputs": [[121, 0], [122, 0], [123, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc4_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "64"
- },
- "name": "fc4_5",
- "inputs": [[92, 0], [125, 0], [126, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "prelu4_5_gamma",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "LeakyReLU",
- "param": {
- "act_type": "prelu",
- "lower_bound": "0.125",
- "slope": "0.25",
- "upper_bound": "0.334"
- },
- "name": "prelu4_5",
- "inputs": [[127, 0], [128, 0]],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_5_weight",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "null",
- "param": {},
- "name": "fc5_5_bias",
- "inputs": [],
- "backward_source_id": -1
- },
- {
- "op": "FullyConnected",
- "param": {
- "no_bias": "False",
- "num_hidden": "2"
- },
- "name": "fc5_5",
- "inputs": [[129, 0], [130, 0], [131, 0]],
- "backward_source_id": -1
- }
- ],
- "arg_nodes": [
- 0,
- 2,
- 3,
- 5,
- 8,
- 9,
- 11,
- 14,
- 15,
- 17,
- 19,
- 20,
- 22,
- 25,
- 26,
- 28,
- 31,
- 32,
- 34,
- 36,
- 37,
- 39,
- 42,
- 43,
- 45,
- 48,
- 49,
- 51,
- 53,
- 54,
- 56,
- 59,
- 60,
- 62,
- 65,
- 66,
- 68,
- 70,
- 71,
- 73,
- 76,
- 77,
- 79,
- 82,
- 83,
- 85,
- 88,
- 89,
- 91,
- 93,
- 94,
- 96,
- 98,
- 99,
- 101,
- 102,
- 104,
- 106,
- 107,
- 109,
- 110,
- 112,
- 114,
- 115,
- 117,
- 118,
- 120,
- 122,
- 123,
- 125,
- 126,
- 128,
- 130,
- 131
- ],
- "heads": [[100, 0], [108, 0], [116, 0], [124, 0], [132, 0]]
-}
\ No newline at end of file
diff --git a/gender-age/mtcnn-model/det4.caffemodel b/gender-age/mtcnn-model/det4.caffemodel
deleted file mode 100644
index 38353c4..0000000
Binary files a/gender-age/mtcnn-model/det4.caffemodel and /dev/null differ
diff --git a/gender-age/mtcnn-model/det4.prototxt b/gender-age/mtcnn-model/det4.prototxt
deleted file mode 100644
index 4cdc329..0000000
--- a/gender-age/mtcnn-model/det4.prototxt
+++ /dev/null
@@ -1,995 +0,0 @@
-name: "LNet"
-input: "data"
-input_dim: 1
-input_dim: 15
-input_dim: 24
-input_dim: 24
-
-layer {
- name: "slicer_data"
- type: "Slice"
- bottom: "data"
- top: "data241"
- top: "data242"
- top: "data243"
- top: "data244"
- top: "data245"
- slice_param {
- axis: 1
- slice_point: 3
- slice_point: 6
- slice_point: 9
- slice_point: 12
- }
-}
-layer {
- name: "conv1_1"
- type: "Convolution"
- bottom: "data241"
- top: "conv1_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_1"
- type: "PReLU"
- bottom: "conv1_1"
- top: "conv1_1"
-
-}
-layer {
- name: "pool1_1"
- type: "Pooling"
- bottom: "conv1_1"
- top: "pool1_1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_1"
- type: "Convolution"
- bottom: "pool1_1"
- top: "conv2_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_1"
- type: "PReLU"
- bottom: "conv2_1"
- top: "conv2_1"
-}
-layer {
- name: "pool2_1"
- type: "Pooling"
- bottom: "conv2_1"
- top: "pool2_1"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_1"
- type: "Convolution"
- bottom: "pool2_1"
- top: "conv3_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_1"
- type: "PReLU"
- bottom: "conv3_1"
- top: "conv3_1"
-}
-##########################
-layer {
- name: "conv1_2"
- type: "Convolution"
- bottom: "data242"
- top: "conv1_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_2"
- type: "PReLU"
- bottom: "conv1_2"
- top: "conv1_2"
-
-}
-layer {
- name: "pool1_2"
- type: "Pooling"
- bottom: "conv1_2"
- top: "pool1_2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_2"
- type: "Convolution"
- bottom: "pool1_2"
- top: "conv2_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_2"
- type: "PReLU"
- bottom: "conv2_2"
- top: "conv2_2"
-}
-layer {
- name: "pool2_2"
- type: "Pooling"
- bottom: "conv2_2"
- top: "pool2_2"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_2"
- type: "Convolution"
- bottom: "pool2_2"
- top: "conv3_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_2"
- type: "PReLU"
- bottom: "conv3_2"
- top: "conv3_2"
-}
-##########################
-##########################
-layer {
- name: "conv1_3"
- type: "Convolution"
- bottom: "data243"
- top: "conv1_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_3"
- type: "PReLU"
- bottom: "conv1_3"
- top: "conv1_3"
-
-}
-layer {
- name: "pool1_3"
- type: "Pooling"
- bottom: "conv1_3"
- top: "pool1_3"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_3"
- type: "Convolution"
- bottom: "pool1_3"
- top: "conv2_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_3"
- type: "PReLU"
- bottom: "conv2_3"
- top: "conv2_3"
-}
-layer {
- name: "pool2_3"
- type: "Pooling"
- bottom: "conv2_3"
- top: "pool2_3"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_3"
- type: "Convolution"
- bottom: "pool2_3"
- top: "conv3_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_3"
- type: "PReLU"
- bottom: "conv3_3"
- top: "conv3_3"
-}
-##########################
-##########################
-layer {
- name: "conv1_4"
- type: "Convolution"
- bottom: "data244"
- top: "conv1_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_4"
- type: "PReLU"
- bottom: "conv1_4"
- top: "conv1_4"
-
-}
-layer {
- name: "pool1_4"
- type: "Pooling"
- bottom: "conv1_4"
- top: "pool1_4"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_4"
- type: "Convolution"
- bottom: "pool1_4"
- top: "conv2_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_4"
- type: "PReLU"
- bottom: "conv2_4"
- top: "conv2_4"
-}
-layer {
- name: "pool2_4"
- type: "Pooling"
- bottom: "conv2_4"
- top: "pool2_4"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_4"
- type: "Convolution"
- bottom: "pool2_4"
- top: "conv3_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_4"
- type: "PReLU"
- bottom: "conv3_4"
- top: "conv3_4"
-}
-##########################
-##########################
-layer {
- name: "conv1_5"
- type: "Convolution"
- bottom: "data245"
- top: "conv1_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 28
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu1_5"
- type: "PReLU"
- bottom: "conv1_5"
- top: "conv1_5"
-
-}
-layer {
- name: "pool1_5"
- type: "Pooling"
- bottom: "conv1_5"
- top: "pool1_5"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-}
-
-layer {
- name: "conv2_5"
- type: "Convolution"
- bottom: "pool1_5"
- top: "conv2_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 48
- kernel_size: 3
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu2_5"
- type: "PReLU"
- bottom: "conv2_5"
- top: "conv2_5"
-}
-layer {
- name: "pool2_5"
- type: "Pooling"
- bottom: "conv2_5"
- top: "pool2_5"
- pooling_param {
- pool: MAX
- kernel_size: 3
- stride: 2
- }
-
-}
-layer {
- name: "conv3_5"
- type: "Convolution"
- bottom: "pool2_5"
- top: "conv3_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- convolution_param {
- num_output: 64
- kernel_size: 2
- stride: 1
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu3_5"
- type: "PReLU"
- bottom: "conv3_5"
- top: "conv3_5"
-}
-##########################
-layer {
- name: "concat"
- bottom: "conv3_1"
- bottom: "conv3_2"
- bottom: "conv3_3"
- bottom: "conv3_4"
- bottom: "conv3_5"
- top: "conv3"
- type: "Concat"
- concat_param {
- axis: 1
- }
-}
-##########################
-layer {
- name: "fc4"
- type: "InnerProduct"
- bottom: "conv3"
- top: "fc4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 256
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4"
- type: "PReLU"
- bottom: "fc4"
- top: "fc4"
-}
-############################
-layer {
- name: "fc4_1"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_1"
- type: "PReLU"
- bottom: "fc4_1"
- top: "fc4_1"
-}
-layer {
- name: "fc5_1"
- type: "InnerProduct"
- bottom: "fc4_1"
- top: "fc5_1"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-
-#########################
-layer {
- name: "fc4_2"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_2"
- type: "PReLU"
- bottom: "fc4_2"
- top: "fc4_2"
-}
-layer {
- name: "fc5_2"
- type: "InnerProduct"
- bottom: "fc4_2"
- top: "fc5_2"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_3"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_3"
- type: "PReLU"
- bottom: "fc4_3"
- top: "fc4_3"
-}
-layer {
- name: "fc5_3"
- type: "InnerProduct"
- bottom: "fc4_3"
- top: "fc5_3"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_4"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_4"
- type: "PReLU"
- bottom: "fc4_4"
- top: "fc4_4"
-}
-layer {
- name: "fc5_4"
- type: "InnerProduct"
- bottom: "fc4_4"
- top: "fc5_4"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-layer {
- name: "fc4_5"
- type: "InnerProduct"
- bottom: "fc4"
- top: "fc4_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 64
- weight_filler {
- type: "xavier"
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-
-}
-layer {
- name: "prelu4_5"
- type: "PReLU"
- bottom: "fc4_5"
- top: "fc4_5"
-}
-layer {
- name: "fc5_5"
- type: "InnerProduct"
- bottom: "fc4_5"
- top: "fc5_5"
- param {
- lr_mult: 1
- decay_mult: 1
- }
- param {
- lr_mult: 2
- decay_mult: 1
- }
- inner_product_param {
- num_output: 2
- weight_filler {
- type: "xavier"
- #type: "constant"
- #value: 0
- }
- bias_filler {
- type: "constant"
- value: 0
- }
- }
-}
-
-#########################
-
diff --git a/gender-age/mtcnn_detector.py b/gender-age/mtcnn_detector.py
deleted file mode 100644
index 1ce4146..0000000
--- a/gender-age/mtcnn_detector.py
+++ /dev/null
@@ -1,696 +0,0 @@
-# coding: utf-8
-import os
-import mxnet as mx
-import numpy as np
-import math
-import cv2
-from multiprocessing import Pool
-from itertools import repeat
-from itertools import izip
-from helper import nms, adjust_input, generate_bbox, detect_first_stage_warpper
-
-
-class MtcnnDetector(object):
- """
- Joint Face Detection and Alignment using Multi-task Cascaded Convolutional Neural Networks
- see https://github.com/kpzhang93/MTCNN_face_detection_alignment
- this is a mxnet version
- """
- def __init__(self,
- model_folder='.',
- minsize=20,
- threshold=[0.6, 0.7, 0.8],
- factor=0.709,
- num_worker=1,
- accurate_landmark=False,
- ctx=mx.cpu()):
- """
- Initialize the detector
-
- Parameters:
- ----------
- model_folder : string
- path for the models
- minsize : float number
- minimal face to detect
- threshold : float number
- detect threshold for 3 stages
- factor: float number
- scale factor for image pyramid
- num_worker: int number
- number of processes we use for first stage
- accurate_landmark: bool
- use accurate landmark localization or not
-
- """
- self.num_worker = num_worker
- self.accurate_landmark = accurate_landmark
-
- # load 4 models from folder
- models = ['det1', 'det2', 'det3', 'det4']
- models = [os.path.join(model_folder, f) for f in models]
-
- self.PNets = []
- for i in range(num_worker):
- workner_net = mx.model.FeedForward.load(models[0], 1, ctx=ctx)
- self.PNets.append(workner_net)
-
- #self.Pool = Pool(num_worker)
-
- self.RNet = mx.model.FeedForward.load(models[1], 1, ctx=ctx)
- self.ONet = mx.model.FeedForward.load(models[2], 1, ctx=ctx)
- self.LNet = mx.model.FeedForward.load(models[3], 1, ctx=ctx)
-
- self.minsize = float(minsize)
- self.factor = float(factor)
- self.threshold = threshold
-
- def convert_to_square(self, bbox):
- """
- convert bbox to square
-
- Parameters:
- ----------
- bbox: numpy array , shape n x 5
- input bbox
-
- Returns:
- -------
- square bbox
- """
- square_bbox = bbox.copy()
-
- h = bbox[:, 3] - bbox[:, 1] + 1
- w = bbox[:, 2] - bbox[:, 0] + 1
- max_side = np.maximum(h, w)
- square_bbox[:, 0] = bbox[:, 0] + w * 0.5 - max_side * 0.5
- square_bbox[:, 1] = bbox[:, 1] + h * 0.5 - max_side * 0.5
- square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
- square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
- return square_bbox
-
- def calibrate_box(self, bbox, reg):
- """
- calibrate bboxes
-
- Parameters:
- ----------
- bbox: numpy array, shape n x 5
- input bboxes
- reg: numpy array, shape n x 4
- bboxex adjustment
-
- Returns:
- -------
- bboxes after refinement
-
- """
- w = bbox[:, 2] - bbox[:, 0] + 1
- w = np.expand_dims(w, 1)
- h = bbox[:, 3] - bbox[:, 1] + 1
- h = np.expand_dims(h, 1)
- reg_m = np.hstack([w, h, w, h])
- aug = reg_m * reg
- bbox[:, 0:4] = bbox[:, 0:4] + aug
- return bbox
-
- def pad(self, bboxes, w, h):
- """
- pad the the bboxes, alse restrict the size of it
-
- Parameters:
- ----------
- bboxes: numpy array, n x 5
- input bboxes
- w: float number
- width of the input image
- h: float number
- height of the input image
- Returns :
- ------s
- dy, dx : numpy array, n x 1
- start point of the bbox in target image
- edy, edx : numpy array, n x 1
- end point of the bbox in target image
- y, x : numpy array, n x 1
- start point of the bbox in original image
- ex, ex : numpy array, n x 1
- end point of the bbox in original image
- tmph, tmpw: numpy array, n x 1
- height and width of the bbox
-
- """
- tmpw, tmph = bboxes[:, 2] - bboxes[:, 0] + 1, bboxes[:,
- 3] - bboxes[:,
- 1] + 1
- num_box = bboxes.shape[0]
-
- dx, dy = np.zeros((num_box, )), np.zeros((num_box, ))
- edx, edy = tmpw.copy() - 1, tmph.copy() - 1
-
- x, y, ex, ey = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
-
- tmp_index = np.where(ex > w - 1)
- edx[tmp_index] = tmpw[tmp_index] + w - 2 - ex[tmp_index]
- ex[tmp_index] = w - 1
-
- tmp_index = np.where(ey > h - 1)
- edy[tmp_index] = tmph[tmp_index] + h - 2 - ey[tmp_index]
- ey[tmp_index] = h - 1
-
- tmp_index = np.where(x < 0)
- dx[tmp_index] = 0 - x[tmp_index]
- x[tmp_index] = 0
-
- tmp_index = np.where(y < 0)
- dy[tmp_index] = 0 - y[tmp_index]
- y[tmp_index] = 0
-
- return_list = [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
- return_list = [item.astype(np.int32) for item in return_list]
-
- return return_list
-
- def slice_index(self, number):
- """
- slice the index into (n,n,m), m < n
- Parameters:
- ----------
- number: int number
- number
- """
- def chunks(l, n):
- """Yield successive n-sized chunks from l."""
- for i in range(0, len(l), n):
- yield l[i:i + n]
-
- num_list = range(number)
- return list(chunks(num_list, self.num_worker))
-
- def detect_face_limited(self, img, det_type=2):
- height, width, _ = img.shape
- if det_type >= 2:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 24, 24) is the input shape for RNet
- input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- tmp[dy[i]:edy[i] + 1,
- dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
-
- output = self.RNet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[1][:, 1] > self.threshold[1])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
- reg = output[0][passed]
-
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick]
- total_boxes = self.calibrate_box(total_boxes, reg[pick])
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
- else:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
- num_box = total_boxes.shape[0]
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 48, 48) is the input shape for ONet
- input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
-
- output = self.ONet.predict(input_buf)
- #print(output[2])
-
- # filter the total_boxes with threshold
- passed = np.where(output[2][:, 1] > self.threshold[2])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
- reg = output[1][passed]
- points = output[0][passed]
-
- # compute landmark points
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
- points[:, 0:5] = np.expand_dims(
- total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
- points[:, 5:10] = np.expand_dims(
- total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
-
- # nms
- total_boxes = self.calibrate_box(total_boxes, reg)
- pick = nms(total_boxes, 0.7, 'Min')
- total_boxes = total_boxes[pick]
- points = points[pick]
-
- if not self.accurate_landmark:
- return total_boxes, points
-
- #############################################
- # extended stage
- #############################################
- num_box = total_boxes.shape[0]
- patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
- total_boxes[:, 3] - total_boxes[:, 1] + 1)
- patchw = np.round(patchw * 0.25)
-
- # make it even
- patchw[np.where(np.mod(patchw, 2) == 1)] += 1
-
- input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
- for i in range(5):
- x, y = points[:, i], points[:, i + 5]
- x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
- np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
- height)
- for j in range(num_box):
- tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
- tmpim[dy[j]:edy[j] + 1,
- dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
- x[j]:ex[j] + 1, :]
- input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
- cv2.resize(tmpim, (24, 24)))
-
- output = self.LNet.predict(input_buf)
-
- pointx = np.zeros((num_box, 5))
- pointy = np.zeros((num_box, 5))
-
- for k in range(5):
- # do not make a large movement
- tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
- output[k][tmp_index[0]] = 0.5
-
- pointx[:, k] = np.round(points[:, k] -
- 0.5 * patchw) + output[k][:, 0] * patchw
- pointy[:, k] = np.round(points[:, k + 5] -
- 0.5 * patchw) + output[k][:, 1] * patchw
-
- points = np.hstack([pointx, pointy])
- points = points.astype(np.int32)
-
- return total_boxes, points
-
- def detect_face(self, img, det_type=0):
- """
- detect face over img
- Parameters:
- ----------
- img: numpy array, bgr order of shape (1, 3, n, m)
- input image
- Retures:
- -------
- bboxes: numpy array, n x 5 (x1,y2,x2,y2,score)
- bboxes
- points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
- landmarks
- """
-
- # check input
- height, width, _ = img.shape
- if det_type == 0:
- MIN_DET_SIZE = 12
-
- if img is None:
- return None
-
- # only works for color image
- if len(img.shape) != 3:
- return None
-
- # detected boxes
- total_boxes = []
-
- minl = min(height, width)
-
- # get all the valid scales
- scales = []
- m = MIN_DET_SIZE / self.minsize
- minl *= m
- factor_count = 0
- while minl > MIN_DET_SIZE:
- scales.append(m * self.factor**factor_count)
- minl *= self.factor
- factor_count += 1
-
- #############################################
- # first stage
- #############################################
- #for scale in scales:
- # return_boxes = self.detect_first_stage(img, scale, 0)
- # if return_boxes is not None:
- # total_boxes.append(return_boxes)
-
- sliced_index = self.slice_index(len(scales))
- total_boxes = []
- for batch in sliced_index:
- #local_boxes = self.Pool.map( detect_first_stage_warpper, \
- # izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
- local_boxes = map( detect_first_stage_warpper, \
- izip(repeat(img), self.PNets[:len(batch)], [scales[i] for i in batch], repeat(self.threshold[0])) )
- total_boxes.extend(local_boxes)
-
- # remove the Nones
- total_boxes = [i for i in total_boxes if i is not None]
-
- if len(total_boxes) == 0:
- return None
-
- total_boxes = np.vstack(total_boxes)
-
- if total_boxes.size == 0:
- return None
-
- # merge the detection from first stage
- pick = nms(total_boxes[:, 0:5], 0.7, 'Union')
- total_boxes = total_boxes[pick]
-
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
-
- # refine the bboxes
- total_boxes = np.vstack([
- total_boxes[:, 0] + total_boxes[:, 5] * bbw,
- total_boxes[:, 1] + total_boxes[:, 6] * bbh,
- total_boxes[:, 2] + total_boxes[:, 7] * bbw,
- total_boxes[:, 3] + total_boxes[:, 8] * bbh, total_boxes[:, 4]
- ])
-
- total_boxes = total_boxes.T
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
- else:
- total_boxes = np.array(
- [[0.0, 0.0, img.shape[1], img.shape[0], 0.9]],
- dtype=np.float32)
-
- #############################################
- # second stage
- #############################################
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 24, 24) is the input shape for RNet
- input_buf = np.zeros((num_box, 3, 24, 24), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (24, 24)))
-
- output = self.RNet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[1][:, 1] > self.threshold[1])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[1][passed, 1].reshape((-1, ))
- reg = output[0][passed]
-
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick]
- total_boxes = self.calibrate_box(total_boxes, reg[pick])
- total_boxes = self.convert_to_square(total_boxes)
- total_boxes[:, 0:4] = np.round(total_boxes[:, 0:4])
-
- #############################################
- # third stage
- #############################################
- num_box = total_boxes.shape[0]
-
- # pad the bbox
- [dy, edy, dx, edx, y, ey, x, ex, tmpw,
- tmph] = self.pad(total_boxes, width, height)
- # (3, 48, 48) is the input shape for ONet
- input_buf = np.zeros((num_box, 3, 48, 48), dtype=np.float32)
-
- for i in range(num_box):
- tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.float32)
- tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1,
- x[i]:ex[i] + 1, :]
- input_buf[i, :, :, :] = adjust_input(cv2.resize(tmp, (48, 48)))
-
- output = self.ONet.predict(input_buf)
-
- # filter the total_boxes with threshold
- passed = np.where(output[2][:, 1] > self.threshold[2])
- total_boxes = total_boxes[passed]
-
- if total_boxes.size == 0:
- return None
-
- total_boxes[:, 4] = output[2][passed, 1].reshape((-1, ))
- reg = output[1][passed]
- points = output[0][passed]
-
- # compute landmark points
- bbw = total_boxes[:, 2] - total_boxes[:, 0] + 1
- bbh = total_boxes[:, 3] - total_boxes[:, 1] + 1
- points[:, 0:5] = np.expand_dims(
- total_boxes[:, 0], 1) + np.expand_dims(bbw, 1) * points[:, 0:5]
- points[:, 5:10] = np.expand_dims(
- total_boxes[:, 1], 1) + np.expand_dims(bbh, 1) * points[:, 5:10]
-
- # nms
- total_boxes = self.calibrate_box(total_boxes, reg)
- pick = nms(total_boxes, 0.7, 'Min')
- total_boxes = total_boxes[pick]
- points = points[pick]
-
- if not self.accurate_landmark:
- return total_boxes, points
-
- #############################################
- # extended stage
- #############################################
- num_box = total_boxes.shape[0]
- patchw = np.maximum(total_boxes[:, 2] - total_boxes[:, 0] + 1,
- total_boxes[:, 3] - total_boxes[:, 1] + 1)
- patchw = np.round(patchw * 0.25)
-
- # make it even
- patchw[np.where(np.mod(patchw, 2) == 1)] += 1
-
- input_buf = np.zeros((num_box, 15, 24, 24), dtype=np.float32)
- for i in range(5):
- x, y = points[:, i], points[:, i + 5]
- x, y = np.round(x - 0.5 * patchw), np.round(y - 0.5 * patchw)
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(
- np.vstack([x, y, x + patchw - 1, y + patchw - 1]).T, width,
- height)
- for j in range(num_box):
- tmpim = np.zeros((tmpw[j], tmpw[j], 3), dtype=np.float32)
- tmpim[dy[j]:edy[j] + 1,
- dx[j]:edx[j] + 1, :] = img[y[j]:ey[j] + 1,
- x[j]:ex[j] + 1, :]
- input_buf[j, i * 3:i * 3 + 3, :, :] = adjust_input(
- cv2.resize(tmpim, (24, 24)))
-
- output = self.LNet.predict(input_buf)
-
- pointx = np.zeros((num_box, 5))
- pointy = np.zeros((num_box, 5))
-
- for k in range(5):
- # do not make a large movement
- tmp_index = np.where(np.abs(output[k] - 0.5) > 0.35)
- output[k][tmp_index[0]] = 0.5
-
- pointx[:, k] = np.round(points[:, k] -
- 0.5 * patchw) + output[k][:, 0] * patchw
- pointy[:, k] = np.round(points[:, k + 5] -
- 0.5 * patchw) + output[k][:, 1] * patchw
-
- points = np.hstack([pointx, pointy])
- points = points.astype(np.int32)
-
- return total_boxes, points
-
- def list2colmatrix(self, pts_list):
- """
- convert list to column matrix
- Parameters:
- ----------
- pts_list:
- input list
- Retures:
- -------
- colMat:
-
- """
- assert len(pts_list) > 0
- colMat = []
- for i in range(len(pts_list)):
- colMat.append(pts_list[i][0])
- colMat.append(pts_list[i][1])
- colMat = np.matrix(colMat).transpose()
- return colMat
-
- def find_tfrom_between_shapes(self, from_shape, to_shape):
- """
- find transform between shapes
- Parameters:
- ----------
- from_shape:
- to_shape:
- Retures:
- -------
- tran_m:
- tran_b:
- """
- assert from_shape.shape[0] == to_shape.shape[
- 0] and from_shape.shape[0] % 2 == 0
-
- sigma_from = 0.0
- sigma_to = 0.0
- cov = np.matrix([[0.0, 0.0], [0.0, 0.0]])
-
- # compute the mean and cov
- from_shape_points = from_shape.reshape(from_shape.shape[0] / 2, 2)
- to_shape_points = to_shape.reshape(to_shape.shape[0] / 2, 2)
- mean_from = from_shape_points.mean(axis=0)
- mean_to = to_shape_points.mean(axis=0)
-
- for i in range(from_shape_points.shape[0]):
- temp_dis = np.linalg.norm(from_shape_points[i] - mean_from)
- sigma_from += temp_dis * temp_dis
- temp_dis = np.linalg.norm(to_shape_points[i] - mean_to)
- sigma_to += temp_dis * temp_dis
- cov += (to_shape_points[i].transpose() -
- mean_to.transpose()) * (from_shape_points[i] - mean_from)
-
- sigma_from = sigma_from / to_shape_points.shape[0]
- sigma_to = sigma_to / to_shape_points.shape[0]
- cov = cov / to_shape_points.shape[0]
-
- # compute the affine matrix
- s = np.matrix([[1.0, 0.0], [0.0, 1.0]])
- u, d, vt = np.linalg.svd(cov)
-
- if np.linalg.det(cov) < 0:
- if d[1] < d[0]:
- s[1, 1] = -1
- else:
- s[0, 0] = -1
- r = u * s * vt
- c = 1.0
- if sigma_from != 0:
- c = 1.0 / sigma_from * np.trace(np.diag(d) * s)
-
- tran_b = mean_to.transpose() - c * r * mean_from.transpose()
- tran_m = c * r
-
- return tran_m, tran_b
-
- def extract_image_chips(self, img, points, desired_size=256, padding=0):
- """
- crop and align face
- Parameters:
- ----------
- img: numpy array, bgr order of shape (1, 3, n, m)
- input image
- points: numpy array, n x 10 (x1, x2 ... x5, y1, y2 ..y5)
- desired_size: default 256
- padding: default 0
- Retures:
- -------
- crop_imgs: list, n
- cropped and aligned faces
- """
- crop_imgs = []
- for p in points:
- shape = []
- for k in range(len(p) / 2):
- shape.append(p[k])
- shape.append(p[k + 5])
-
- if padding > 0:
- padding = padding
- else:
- padding = 0
- # average positions of face points
- mean_face_shape_x = [
- 0.224152, 0.75610125, 0.490127, 0.254149, 0.726104
- ]
- mean_face_shape_y = [
- 0.2119465, 0.2119465, 0.628106, 0.780233, 0.780233
- ]
-
- from_points = []
- to_points = []
-
- for i in range(len(shape) / 2):
- x = (padding + mean_face_shape_x[i]) / (2 * padding +
- 1) * desired_size
- y = (padding + mean_face_shape_y[i]) / (2 * padding +
- 1) * desired_size
- to_points.append([x, y])
- from_points.append([shape[2 * i], shape[2 * i + 1]])
-
- # convert the points to Mat
- from_mat = self.list2colmatrix(from_points)
- to_mat = self.list2colmatrix(to_points)
-
- # compute the similar transfrom
- tran_m, tran_b = self.find_tfrom_between_shapes(from_mat, to_mat)
-
- probe_vec = np.matrix([1.0, 0.0]).transpose()
- probe_vec = tran_m * probe_vec
-
- scale = np.linalg.norm(probe_vec)
- angle = 180.0 / math.pi * math.atan2(probe_vec[1, 0], probe_vec[0,
- 0])
-
- from_center = [(shape[0] + shape[2]) / 2.0,
- (shape[1] + shape[3]) / 2.0]
- to_center = [0, 0]
- to_center[1] = desired_size * 0.4
- to_center[0] = desired_size * 0.5
-
- ex = to_center[0] - from_center[0]
- ey = to_center[1] - from_center[1]
-
- rot_mat = cv2.getRotationMatrix2D((from_center[0], from_center[1]),
- -1 * angle, scale)
- rot_mat[0][2] += ex
- rot_mat[1][2] += ey
-
- chips = cv2.warpAffine(img, rot_mat, (desired_size, desired_size))
- crop_imgs.append(chips)
-
- return crop_imgs
diff --git a/gender-age/test.py b/gender-age/test.py
deleted file mode 100644
index 78fc3b0..0000000
--- a/gender-age/test.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import face_model
-import argparse
-import cv2
-import sys
-import numpy as np
-import datetime
-
-parser = argparse.ArgumentParser(description='face model test')
-# general
-parser.add_argument('--image-size', default='112,112', help='')
-parser.add_argument('--image', default='Tom_Hanks_54745.png', help='')
-parser.add_argument('--model',
- default='model/model,0',
- help='path to load model.')
-parser.add_argument('--gpu', default=0, type=int, help='gpu id')
-parser.add_argument(
- '--det',
- default=0,
- type=int,
- help='mtcnn option, 1 means using R+O, 0 means detect from begining')
-args = parser.parse_args()
-
-model = face_model.FaceModel(args)
-#img = cv2.imread('Tom_Hanks_54745.png')
-img = cv2.imread(args.image)
-img = model.get_input(img)
-#f1 = model.get_feature(img)
-#print(f1[0:10])
-for _ in range(5):
- gender, age = model.get_ga(img)
-time_now = datetime.datetime.now()
-count = 200
-for _ in range(count):
- gender, age = model.get_ga(img)
-time_now2 = datetime.datetime.now()
-diff = time_now2 - time_now
-print('time cost', diff.total_seconds() / count)
-print('gender is', gender)
-print('age is', age)
diff --git a/gender-age/train.py b/gender-age/train.py
deleted file mode 100644
index 4948d78..0000000
--- a/gender-age/train.py
+++ /dev/null
@@ -1,420 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import sys
-import math
-import random
-import logging
-import pickle
-import numpy as np
-import sklearn
-from data import FaceImageIter
-import mxnet as mx
-from mxnet import ndarray as nd
-import argparse
-import mxnet.optimizer as optimizer
-sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
-#import face_image
-import fresnet
-import fmobilenet
-
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-
-AGE = 100
-
-args = None
-
-
-class AccMetric(mx.metric.EvalMetric):
- def __init__(self):
- self.axis = 1
- super(AccMetric, self).__init__('acc',
- axis=self.axis,
- output_names=None,
- label_names=None)
- self.losses = []
- self.count = 0
-
- def update(self, labels, preds):
- self.count += 1
- label = labels[0].asnumpy()[:, 0:1]
- pred_label = preds[-1].asnumpy()[:, 0:2]
- pred_label = np.argmax(pred_label, axis=self.axis)
- pred_label = pred_label.astype('int32').flatten()
- label = label.astype('int32').flatten()
- assert label.shape == pred_label.shape
- self.sum_metric += (pred_label.flat == label.flat).sum()
- self.num_inst += len(pred_label.flat)
-
-
-class LossValueMetric(mx.metric.EvalMetric):
- def __init__(self):
- self.axis = 1
- super(LossValueMetric, self).__init__('lossvalue',
- axis=self.axis,
- output_names=None,
- label_names=None)
- self.losses = []
-
- def update(self, labels, preds):
- loss = preds[-1].asnumpy()[0]
- self.sum_metric += loss
- self.num_inst += 1.0
- gt_label = preds[-2].asnumpy()
- #print(gt_label)
-
-
-class MAEMetric(mx.metric.EvalMetric):
- def __init__(self):
- self.axis = 1
- super(MAEMetric, self).__init__('MAE',
- axis=self.axis,
- output_names=None,
- label_names=None)
- self.losses = []
- self.count = 0
-
- def update(self, labels, preds):
- self.count += 1
- label = labels[0].asnumpy()
- label_age = np.count_nonzero(label[:, 1:], axis=1)
- pred_age = np.zeros(label_age.shape, dtype=np.int)
- #pred_age = np.zeros( label_age.shape, dtype=np.float32)
- pred = preds[-1].asnumpy()
- for i in range(AGE):
- _pred = pred[:, 2 + i * 2:4 + i * 2]
- _pred = np.argmax(_pred, axis=1)
- #pred = pred[:,1]
- pred_age += _pred
- #pred_age = pred_age.astype(np.int)
- mae = np.mean(np.abs(label_age - pred_age))
- self.sum_metric += mae
- self.num_inst += 1.0
-
-
-class CUMMetric(mx.metric.EvalMetric):
- def __init__(self, n=5):
- self.axis = 1
- self.n = n
- super(CUMMetric, self).__init__('CUM_%d' % n,
- axis=self.axis,
- output_names=None,
- label_names=None)
- self.losses = []
- self.count = 0
-
- def update(self, labels, preds):
- self.count += 1
- label = labels[0].asnumpy()
- label_age = np.count_nonzero(label[:, 1:], axis=1)
- pred_age = np.zeros(label_age.shape, dtype=np.int)
- pred = preds[-1].asnumpy()
- for i in range(AGE):
- _pred = pred[:, 2 + i * 2:4 + i * 2]
- _pred = np.argmax(_pred, axis=1)
- #pred = pred[:,1]
- pred_age += _pred
- diff = np.abs(label_age - pred_age)
- cum = np.sum((diff < self.n))
- self.sum_metric += cum
- self.num_inst += len(label_age)
-
-
-def parse_args():
- parser = argparse.ArgumentParser(description='Train face network')
- # general
- parser.add_argument('--data-dir',
- default='',
- help='training set directory')
- parser.add_argument('--prefix',
- default='../model/model',
- help='directory to save model.')
- parser.add_argument('--pretrained',
- default='',
- help='pretrained model to load')
- parser.add_argument(
- '--ckpt',
- type=int,
- default=1,
- help=
- 'checkpoint saving option. 0: discard saving. 1: save when necessary. 2: always save'
- )
- parser.add_argument('--loss-type', type=int, default=4, help='loss type')
- parser.add_argument(
- '--verbose',
- type=int,
- default=2000,
- help='do verification testing and model saving every verbose batches')
- parser.add_argument('--max-steps',
- type=int,
- default=0,
- help='max training batches')
- parser.add_argument('--end-epoch',
- type=int,
- default=100000,
- help='training epoch size.')
- parser.add_argument('--network', default='r50', help='specify network')
- parser.add_argument('--image-size',
- default='112,112',
- help='specify input image height and width')
- parser.add_argument('--version-input',
- type=int,
- default=1,
- help='network input config')
- parser.add_argument('--version-output',
- type=str,
- default='GAP',
- help='network embedding output config')
- parser.add_argument('--version-act',
- type=str,
- default='prelu',
- help='network activation config')
- parser.add_argument('--multiplier', type=float, default=1.0, help='')
- parser.add_argument('--lr',
- type=float,
- default=0.1,
- help='start learning rate')
- parser.add_argument('--lr-steps',
- type=str,
- default='',
- help='steps of lr changing')
- parser.add_argument('--wd',
- type=float,
- default=0.0005,
- help='weight decay')
- parser.add_argument('--bn-mom', type=float, default=0.9, help='bn mom')
- parser.add_argument('--mom', type=float, default=0.9, help='momentum')
- parser.add_argument('--per-batch-size',
- type=int,
- default=128,
- help='batch size in each context')
- parser.add_argument('--rand-mirror',
- type=int,
- default=1,
- help='if do random mirror in training')
- parser.add_argument('--cutoff', type=int, default=0, help='cut off aug')
- parser.add_argument('--color',
- type=int,
- default=0,
- help='color jittering aug')
- parser.add_argument('--ce-loss',
- default=False,
- action='store_true',
- help='if output ce loss')
- args = parser.parse_args()
- return args
-
-
-def get_symbol(args, arg_params, aux_params):
- data_shape = (args.image_channel, args.image_h, args.image_w)
- image_shape = ",".join([str(x) for x in data_shape])
- margin_symbols = []
- if args.network[0] == 'm':
- fc1 = fmobilenet.get_symbol(AGE * 2 + 2,
- multiplier=args.multiplier,
- version_input=args.version_input,
- version_output=args.version_output)
- else:
- fc1 = fresnet.get_symbol(AGE * 2 + 2,
- args.num_layers,
- version_input=args.version_input,
- version_output=args.version_output)
- label = mx.symbol.Variable('softmax_label')
- gender_label = mx.symbol.slice_axis(data=label, axis=1, begin=0, end=1)
- gender_label = mx.symbol.reshape(gender_label,
- shape=(args.per_batch_size, ))
- gender_fc1 = mx.symbol.slice_axis(data=fc1, axis=1, begin=0, end=2)
- #gender_fc7 = mx.sym.FullyConnected(data=gender_fc1, num_hidden=2, name='gender_fc7')
- gender_softmax = mx.symbol.SoftmaxOutput(data=gender_fc1,
- label=gender_label,
- name='gender_softmax',
- normalization='valid',
- use_ignore=True,
- ignore_label=9999)
- outs = [gender_softmax]
- for i in range(AGE):
- age_label = mx.symbol.slice_axis(data=label,
- axis=1,
- begin=i + 1,
- end=i + 2)
- age_label = mx.symbol.reshape(age_label, shape=(args.per_batch_size, ))
- age_fc1 = mx.symbol.slice_axis(data=fc1,
- axis=1,
- begin=2 + i * 2,
- end=4 + i * 2)
- #age_fc7 = mx.sym.FullyConnected(data=age_fc1, num_hidden=2, name='age_fc7_%i'%i)
- age_softmax = mx.symbol.SoftmaxOutput(data=age_fc1,
- label=age_label,
- name='age_softmax_%d' % i,
- normalization='valid',
- grad_scale=1)
- outs.append(age_softmax)
- outs.append(mx.sym.BlockGrad(fc1))
-
- out = mx.symbol.Group(outs)
- return (out, arg_params, aux_params)
-
-
-def train_net(args):
- ctx = []
- cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
- if len(cvd) > 0:
- for i in range(len(cvd.split(','))):
- ctx.append(mx.gpu(i))
- if len(ctx) == 0:
- ctx = [mx.cpu()]
- print('use cpu')
- else:
- print('gpu num:', len(ctx))
- prefix = args.prefix
- prefix_dir = os.path.dirname(prefix)
- if not os.path.exists(prefix_dir):
- os.makedirs(prefix_dir)
- end_epoch = args.end_epoch
- args.ctx_num = len(ctx)
- args.num_layers = int(args.network[1:])
- print('num_layers', args.num_layers)
- if args.per_batch_size == 0:
- args.per_batch_size = 128
- args.batch_size = args.per_batch_size * args.ctx_num
- args.rescale_threshold = 0
- args.image_channel = 3
-
- data_dir_list = args.data_dir.split(',')
- assert len(data_dir_list) == 1
- data_dir = data_dir_list[0]
- path_imgrec = None
- path_imglist = None
- image_size = [int(x) for x in args.image_size.split(',')]
- assert len(image_size) == 2
- assert image_size[0] == image_size[1]
- args.image_h = image_size[0]
- args.image_w = image_size[1]
- print('image_size', image_size)
- path_imgrec = os.path.join(data_dir, "train.rec")
- path_imgrec_val = os.path.join(data_dir, "val.rec")
-
- print('Called with argument:', args)
- data_shape = (args.image_channel, image_size[0], image_size[1])
- mean = None
-
- begin_epoch = 0
- base_lr = args.lr
- base_wd = args.wd
- base_mom = args.mom
- if len(args.pretrained) == 0:
- arg_params = None
- aux_params = None
- sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
- else:
- vec = args.pretrained.split(',')
- print('loading', vec)
- _, arg_params, aux_params = mx.model.load_checkpoint(
- vec[0], int(vec[1]))
- sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
-
- #label_name = 'softmax_label'
- #label_shape = (args.batch_size,)
- model = mx.mod.Module(
- context=ctx,
- symbol=sym,
- )
- val_dataiter = None
-
- train_dataiter = FaceImageIter(
- batch_size=args.batch_size,
- data_shape=data_shape,
- path_imgrec=path_imgrec,
- shuffle=True,
- rand_mirror=args.rand_mirror,
- mean=mean,
- cutoff=args.cutoff,
- color_jittering=args.color,
- )
- val_dataiter = FaceImageIter(
- batch_size=args.batch_size,
- data_shape=data_shape,
- path_imgrec=path_imgrec_val,
- shuffle=False,
- rand_mirror=False,
- mean=mean,
- )
-
- metric = mx.metric.CompositeEvalMetric(
- [AccMetric(), MAEMetric(), CUMMetric()])
-
- if args.network[0] == 'r' or args.network[0] == 'y':
- initializer = mx.init.Xavier(rnd_type='gaussian',
- factor_type="out",
- magnitude=2) #resnet style
- elif args.network[0] == 'i' or args.network[0] == 'x':
- initializer = mx.init.Xavier(rnd_type='gaussian',
- factor_type="in",
- magnitude=2) #inception
- else:
- initializer = mx.init.Xavier(rnd_type='uniform',
- factor_type="in",
- magnitude=2)
- _rescale = 1.0 / args.ctx_num
- opt = optimizer.SGD(learning_rate=base_lr,
- momentum=base_mom,
- wd=base_wd,
- rescale_grad=_rescale)
- #opt = optimizer.Nadam(learning_rate=base_lr, wd=base_wd, rescale_grad=_rescale)
- som = 20
- _cb = mx.callback.Speedometer(args.batch_size, som)
- lr_steps = [int(x) for x in args.lr_steps.split(',')]
-
- global_step = [0]
-
- def _batch_callback(param):
- _cb(param)
- global_step[0] += 1
- mbatch = global_step[0]
- for _lr in lr_steps:
- if mbatch == _lr:
- opt.lr *= 0.1
- print('lr change to', opt.lr)
- break
- if mbatch % 1000 == 0:
- print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)
- if mbatch == lr_steps[-1]:
- arg, aux = model.get_params()
- all_layers = model.symbol.get_internals()
- _sym = all_layers['fc1_output']
- mx.model.save_checkpoint(args.prefix, 0, _sym, arg, aux)
- sys.exit(0)
-
- epoch_cb = None
- train_dataiter = mx.io.PrefetchingIter(train_dataiter)
- print('start fitting')
-
- model.fit(
- train_dataiter,
- begin_epoch=begin_epoch,
- num_epoch=end_epoch,
- eval_data=val_dataiter,
- eval_metric=metric,
- kvstore='device',
- optimizer=opt,
- #optimizer_params = optimizer_params,
- initializer=initializer,
- arg_params=arg_params,
- aux_params=aux_params,
- allow_missing=True,
- batch_end_callback=_batch_callback,
- epoch_end_callback=epoch_cb)
-
-
-def main():
- #time.sleep(3600*6.5)
- global args
- args = parse_args()
- train_net(args)
-
-
-if __name__ == '__main__':
- main()
diff --git a/python-package/README.md b/python-package/README.md
index 10777df..df6eced 100644
--- a/python-package/README.md
+++ b/python-package/README.md
@@ -1,4 +1,4 @@
-## Python package of insightface README
+## Python package
For insightface pip-package <= 0.1.5, we use MXNet as inference backend, please download all models from [onedrive](https://1drv.ms/u/s!AswpsDO2toNKrUy0VktHTWgIQ0bn?e=UEF7C4), and put them all under `~/.insightface/models/` directory.
diff --git a/deploy/Tom_Hanks_54745.png b/python-package/insightface/data/images/Tom_Hanks_54745.png
similarity index 100%
rename from deploy/Tom_Hanks_54745.png
rename to python-package/insightface/data/images/Tom_Hanks_54745.png
diff --git a/sample-images/t1.jpg b/python-package/insightface/data/images/t1.jpg
similarity index 100%
rename from sample-images/t1.jpg
rename to python-package/insightface/data/images/t1.jpg
diff --git a/python-package/insightface/model_zoo/arcface_onnx.py b/python-package/insightface/model_zoo/arcface_onnx.py
index 02a6757..9c7de62 100644
--- a/python-package/insightface/model_zoo/arcface_onnx.py
+++ b/python-package/insightface/model_zoo/arcface_onnx.py
@@ -82,4 +82,14 @@ class ArcFaceONNX:
sim = np.dot(feat1, feat2) / (norm(feat1) * norm(feat2))
return sim
+ def forward(self, imgs):
+ if not isinstance(imgs, list):
+ imgs = [imgs]
+ input_size = self.input_size
+
+ blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
+ (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+ net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
+ return net_out
+
diff --git a/recognition/README.md b/recognition/README.md
index 5cda5a3..e494947 100644
--- a/recognition/README.md
+++ b/recognition/README.md
@@ -1,24 +1,46 @@
-## Angular Margin Loss for Deep Face Recognition
+## Face Recognition
-### Citation
-If you find this project useful in your research, please consider to cite the following related papers:
+
+
+
-```
-@inproceedings{deng2019arcface,
- title={Arcface: Additive angular margin loss for deep face recognition},
- author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
- booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
- pages={4690--4699},
- year={2019}
-}
+## Introduction
+
+These are the face recognition methods of [InsightFace](https://insightface.ai)
+
+
+
+
+
+
+
+### Datasets
+
+ Please refer to [datasets](_datasets_) page for the details of face recognition datasets used for training and evaluation.
+
+### Evaluation
+
+ Please refer to [evaluation](_evaluation_) page for the details of face recognition evaluation.
+
+
+## Methods
+
+
+Supported methods:
+
+- [x] [ArcFace_mxnet (CVPR'2019)](arcface_mxnet)
+- [x] [ArcFace_torch (CVPR'2019)](arcface_torch)
+- [x] [SubCenter ArcFace (ECCV'2020)](subcenter_arcface)
+- [x] [PartialFC_mxnet (Arxiv'2020)](partial_fc)
+- [x] [PartialFC_torch (Arxiv'2020)](arcface_torch)
+- [x] [VPL (CVPR'2021)](vpl)
+- [x] [OneFlow_face](oneflow_face)
+
+
+## Contributing
+
+We appreciate all contributions to improve the face recognition model zoo of InsightFace.
-@inproceedings{deng2020subcenter,
- title={Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces},
- author={Deng, Jiankang and Guo, Jia and Liu, Tongliang and Gong, Mingming and Zafeiriou, Stefanos},
- booktitle={Proceedings of the IEEE Conference on European Conference on Computer Vision},
- year={2020}
-}
-```
diff --git a/recognition/_datasets_/README.md b/recognition/_datasets_/README.md
new file mode 100644
index 0000000..5cda5a3
--- /dev/null
+++ b/recognition/_datasets_/README.md
@@ -0,0 +1,24 @@
+## Angular Margin Loss for Deep Face Recognition
+
+### Citation
+
+If you find this project useful in your research, please consider to cite the following related papers:
+
+```
+
+@inproceedings{deng2019arcface,
+ title={Arcface: Additive angular margin loss for deep face recognition},
+ author={Deng, Jiankang and Guo, Jia and Xue, Niannan and Zafeiriou, Stefanos},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={4690--4699},
+ year={2019}
+}
+
+@inproceedings{deng2020subcenter,
+ title={Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces},
+ author={Deng, Jiankang and Guo, Jia and Liu, Tongliang and Gong, Mingming and Zafeiriou, Stefanos},
+ booktitle={Proceedings of the IEEE Conference on European Conference on Computer Vision},
+ year={2020}
+}
+
+```
diff --git a/evaluation/IJB/README.md b/recognition/_evaluation_/ijb/README.md
similarity index 100%
rename from evaluation/IJB/README.md
rename to recognition/_evaluation_/ijb/README.md
diff --git a/evaluation/IJB/example.sh b/recognition/_evaluation_/ijb/example.sh
similarity index 59%
rename from evaluation/IJB/example.sh
rename to recognition/_evaluation_/ijb/example.sh
index 9a386c2..63f8269 100755
--- a/evaluation/IJB/example.sh
+++ b/recognition/_evaluation_/ijb/example.sh
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
-python -u IJB_11.py --model-prefix ./pretrained_models/r100-arcface/model --model-epoch 1 --gpu 0 --target IJBC --job arcface > ijbc_11.log 2>&1 &
+python -u ijb_11.py --model-prefix ./pretrained_models/r100-arcface/model --model-epoch 1 --gpu 0 --target IJBC --job arcface > ijbc_11.log 2>&1 &
-python -u IJB_1N.py --model-prefix ./pretrained_models/r100-arcface/model --model-epoch 1 --gpu 0 --target IJBB --job arcface > ijbb_1n.log 2>&1 &
+python -u ijb_1n.py --model-prefix ./pretrained_models/r100-arcface/model --model-epoch 1 --gpu 0 --target IJBB --job arcface > ijbb_1n.log 2>&1 &
diff --git a/evaluation/IJB/IJB_11.py b/recognition/_evaluation_/ijb/ijb_11.py
similarity index 100%
rename from evaluation/IJB/IJB_11.py
rename to recognition/_evaluation_/ijb/ijb_11.py
diff --git a/evaluation/IJB/IJB_1N.py b/recognition/_evaluation_/ijb/ijb_1n.py
similarity index 100%
rename from evaluation/IJB/IJB_1N.py
rename to recognition/_evaluation_/ijb/ijb_1n.py
diff --git a/evaluation/IJB/IJB_evals.py b/recognition/_evaluation_/ijb/ijb_evals.py
similarity index 100%
rename from evaluation/IJB/IJB_evals.py
rename to recognition/_evaluation_/ijb/ijb_evals.py
diff --git a/recognition/_evaluation_/ijb/ijb_onnx.py b/recognition/_evaluation_/ijb/ijb_onnx.py
new file mode 100644
index 0000000..eb2edbe
--- /dev/null
+++ b/recognition/_evaluation_/ijb/ijb_onnx.py
@@ -0,0 +1,267 @@
+import argparse
+import os
+import pickle
+import timeit
+
+import cv2
+import mxnet as mx
+import numpy as np
+import pandas as pd
+import prettytable
+import skimage.transform
+from sklearn.metrics import roc_curve
+from sklearn.preprocessing import normalize
+import insightface
+from insightface.model_zoo import ArcFaceONNX
+
+
+SRC = np.array(
+ [
+ [30.2946, 51.6963],
+ [65.5318, 51.5014],
+ [48.0252, 71.7366],
+ [33.5493, 92.3655],
+ [62.7299, 92.2041]]
+ , dtype=np.float32)
+SRC[:, 0] += 8.0
+
+
+class AlignedDataSet(mx.gluon.data.Dataset):
+ def __init__(self, root, lines, align=True):
+ self.lines = lines
+ self.root = root
+ self.align = align
+
+ def __len__(self):
+ return len(self.lines)
+
+ def __getitem__(self, idx):
+ each_line = self.lines[idx]
+ name_lmk_score = each_line.strip().split(' ')
+ name = os.path.join(self.root, name_lmk_score[0])
+ img = cv2.cvtColor(cv2.imread(name), cv2.COLOR_BGR2RGB)
+ landmark5 = np.array([float(x) for x in name_lmk_score[1:-1]], dtype=np.float32).reshape((5, 2))
+ st = skimage.transform.SimilarityTransform()
+ st.estimate(landmark5, SRC)
+ img = cv2.warpAffine(img, st.params[0:2, :], (112, 112), borderValue=0.0)
+ img_1 = np.expand_dims(img, 0)
+ img_2 = np.expand_dims(np.fliplr(img), 0)
+ output = np.concatenate((img_1, img_2), axis=0).astype(np.float32)
+ output = np.transpose(output, (0, 3, 1, 2))
+ output = mx.nd.array(output)
+ return output
+
+
+def extract(model_file, dataset):
+ model = ArcFaceONNX(model_file=model_file)
+ model.check()
+ feat_mat = np.zeros(shape=(len(dataset), 2 * model.feat_dim))
+
+ def batchify_fn(data):
+ return mx.nd.concat(*data, dim=0)
+
+ data_loader = mx.gluon.data.DataLoader(
+ dataset, 128, last_batch='keep', num_workers=4,
+ thread_pool=True, prefetch=16, batchify_fn=batchify_fn)
+ num_iter = 0
+ for batch in data_loader:
+ batch = batch.asnumpy()
+ feat = model.forward(batch)
+ feat = np.reshape(feat, (-1, model.feat_dim * 2))
+ feat_mat[128 * num_iter: 128 * num_iter + feat.shape[0], :] = feat
+ num_iter += 1
+ if num_iter % 50 == 0:
+ print(num_iter)
+ return feat_mat
+
+
+def read_template_media_list(path):
+ ijb_meta = pd.read_csv(path, sep=' ', header=None).values
+ templates = ijb_meta[:, 1].astype(np.int)
+ medias = ijb_meta[:, 2].astype(np.int)
+ return templates, medias
+
+
+def read_template_pair_list(path):
+ pairs = pd.read_csv(path, sep=' ', header=None).values
+ t1 = pairs[:, 0].astype(np.int)
+ t2 = pairs[:, 1].astype(np.int)
+ label = pairs[:, 2].astype(np.int)
+ return t1, t2, label
+
+
+def read_image_feature(path):
+ with open(path, 'rb') as fid:
+ img_feats = pickle.load(fid)
+ return img_feats
+
+
+def image2template_feature(img_feats=None,
+ templates=None,
+ medias=None):
+ unique_templates = np.unique(templates)
+ template_feats = np.zeros((len(unique_templates), img_feats.shape[1]))
+ for count_template, uqt in enumerate(unique_templates):
+ (ind_t,) = np.where(templates == uqt)
+ face_norm_feats = img_feats[ind_t]
+ face_medias = medias[ind_t]
+ unique_medias, unique_media_counts = np.unique(face_medias, return_counts=True)
+ media_norm_feats = []
+ for u, ct in zip(unique_medias, unique_media_counts):
+ (ind_m,) = np.where(face_medias == u)
+ if ct == 1:
+ media_norm_feats += [face_norm_feats[ind_m]]
+ else: # image features from the same video will be aggregated into one feature
+ media_norm_feats += [np.mean(face_norm_feats[ind_m], axis=0, keepdims=True), ]
+ media_norm_feats = np.array(media_norm_feats)
+ template_feats[count_template] = np.sum(media_norm_feats, axis=0)
+ if count_template % 2000 == 0:
+ print('Finish Calculating {} template features.'.format(
+ count_template))
+ template_norm_feats = normalize(template_feats)
+ return template_norm_feats, unique_templates
+
+
+def verification(template_norm_feats=None,
+ unique_templates=None,
+ p1=None,
+ p2=None):
+ template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
+ for count_template, uqt in enumerate(unique_templates):
+ template2id[uqt] = count_template
+ score = np.zeros((len(p1),))
+ total_pairs = np.array(range(len(p1)))
+ batchsize = 100000
+ sublists = [total_pairs[i: i + batchsize] for i in range(0, len(p1), batchsize)]
+ total_sublists = len(sublists)
+ for c, s in enumerate(sublists):
+ feat1 = template_norm_feats[template2id[p1[s]]]
+ feat2 = template_norm_feats[template2id[p2[s]]]
+ similarity_score = np.sum(feat1 * feat2, -1)
+ score[s] = similarity_score.flatten()
+ if c % 10 == 0:
+ print('Finish {}/{} pairs.'.format(c, total_sublists))
+ return score
+
+
+def verification2(template_norm_feats=None,
+ unique_templates=None,
+ p1=None,
+ p2=None):
+ template2id = np.zeros((max(unique_templates) + 1, 1), dtype=int)
+ for count_template, uqt in enumerate(unique_templates):
+ template2id[uqt] = count_template
+ score = np.zeros((len(p1),)) # save cosine distance between pairs
+ total_pairs = np.array(range(len(p1)))
+ batchsize = 100000 # small batchsize instead of all pairs in one batch due to the memory limiation
+ sublists = [total_pairs[i:i + batchsize] for i in range(0, len(p1), batchsize)]
+ total_sublists = len(sublists)
+ for c, s in enumerate(sublists):
+ feat1 = template_norm_feats[template2id[p1[s]]]
+ feat2 = template_norm_feats[template2id[p2[s]]]
+ similarity_score = np.sum(feat1 * feat2, -1)
+ score[s] = similarity_score.flatten()
+ if c % 10 == 0:
+ print('Finish {}/{} pairs.'.format(c, total_sublists))
+ return score
+
+
+def main(args):
+ use_norm_score = True # if Ture, TestMode(N1)
+ use_detector_score = True # if Ture, TestMode(D1)
+ use_flip_test = True # if Ture, TestMode(F1)
+ assert args.target == 'IJBC' or args.target == 'IJBB'
+
+ start = timeit.default_timer()
+ templates, medias = read_template_media_list(
+ os.path.join('%s/meta' % args.image_path, '%s_face_tid_mid.txt' % args.target.lower()))
+ stop = timeit.default_timer()
+ print('Time: %.2f s. ' % (stop - start))
+
+ start = timeit.default_timer()
+ p1, p2, label = read_template_pair_list(
+ os.path.join('%s/meta' % args.image_path,
+ '%s_template_pair_label.txt' % args.target.lower()))
+ stop = timeit.default_timer()
+ print('Time: %.2f s. ' % (stop - start))
+
+ start = timeit.default_timer()
+ img_path = '%s/loose_crop' % args.image_path
+ img_list_path = '%s/meta/%s_name_5pts_score.txt' % (args.image_path, args.target.lower())
+ img_list = open(img_list_path)
+ files = img_list.readlines()
+ dataset = AlignedDataSet(root=img_path, lines=files, align=True)
+ img_feats = extract(args.model_file, dataset)
+
+ faceness_scores = []
+ for each_line in files:
+ name_lmk_score = each_line.split()
+ faceness_scores.append(name_lmk_score[-1])
+ faceness_scores = np.array(faceness_scores).astype(np.float32)
+ stop = timeit.default_timer()
+ print('Time: %.2f s. ' % (stop - start))
+ print('Feature Shape: ({} , {}) .'.format(img_feats.shape[0], img_feats.shape[1]))
+ start = timeit.default_timer()
+
+ if use_flip_test:
+ img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2] + img_feats[:, img_feats.shape[1] // 2:]
+ else:
+ img_input_feats = img_feats[:, 0:img_feats.shape[1] // 2]
+
+ if use_norm_score:
+ img_input_feats = img_input_feats
+ else:
+ img_input_feats = img_input_feats / np.sqrt(np.sum(img_input_feats ** 2, -1, keepdims=True))
+
+ if use_detector_score:
+ print(img_input_feats.shape, faceness_scores.shape)
+ img_input_feats = img_input_feats * faceness_scores[:, np.newaxis]
+ else:
+ img_input_feats = img_input_feats
+
+ template_norm_feats, unique_templates = image2template_feature(
+ img_input_feats, templates, medias)
+ stop = timeit.default_timer()
+ print('Time: %.2f s. ' % (stop - start))
+
+ start = timeit.default_timer()
+ score = verification(template_norm_feats, unique_templates, p1, p2)
+ stop = timeit.default_timer()
+ print('Time: %.2f s. ' % (stop - start))
+ save_path = os.path.join(args.result_dir, "{}_result".format(args.target))
+ if not os.path.exists(save_path):
+ os.makedirs(save_path)
+ score_save_file = os.path.join(save_path, "{}.npy".format(args.model_file.split('/')[-1]))
+ np.save(score_save_file, score)
+ files = [score_save_file]
+ methods = []
+ scores = []
+ for file in files:
+ methods.append(os.path.basename(file))
+ scores.append(np.load(file))
+ methods = np.array(methods)
+ scores = dict(zip(methods, scores))
+ x_labels = [10 ** -6, 10 ** -5, 10 ** -4, 10 ** -3, 10 ** -2, 10 ** -1]
+ tpr_fpr_table = prettytable.PrettyTable(['Methods'] + [str(x) for x in x_labels])
+ for method in methods:
+ fpr, tpr, _ = roc_curve(label, scores[method])
+ fpr = np.flipud(fpr)
+ tpr = np.flipud(tpr)
+ tpr_fpr_row = []
+ tpr_fpr_row.append("%s-%s" % (method, args.target))
+ for fpr_iter in np.arange(len(x_labels)):
+ _, min_index = min(
+ list(zip(abs(fpr - x_labels[fpr_iter]), range(len(fpr)))))
+ tpr_fpr_row.append('%.2f' % (tpr[min_index] * 100))
+ tpr_fpr_table.add_row(tpr_fpr_row)
+ print(tpr_fpr_table)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='do onnx ijb test')
+ # general
+ parser.add_argument('--model-file', default='', help='path to onnx model.')
+ parser.add_argument('--image-path', default='', type=str, help='')
+ parser.add_argument('--result-dir', default='.', type=str, help='')
+ parser.add_argument('--target', default='IJBC', type=str, help='target, set to IJBC or IJBB')
+ main(parser.parse_args())
diff --git a/evaluation/Megaface/README.md b/recognition/_evaluation_/megaface/README.md
similarity index 100%
rename from evaluation/Megaface/README.md
rename to recognition/_evaluation_/megaface/README.md
diff --git a/evaluation/Megaface/gen_megaface.py b/recognition/_evaluation_/megaface/gen_megaface.py
similarity index 100%
rename from evaluation/Megaface/gen_megaface.py
rename to recognition/_evaluation_/megaface/gen_megaface.py
diff --git a/evaluation/Megaface/remove_noises.py b/recognition/_evaluation_/megaface/remove_noises.py
similarity index 100%
rename from evaluation/Megaface/remove_noises.py
rename to recognition/_evaluation_/megaface/remove_noises.py
diff --git a/evaluation/Megaface/run.sh b/recognition/_evaluation_/megaface/run.sh
similarity index 100%
rename from evaluation/Megaface/run.sh
rename to recognition/_evaluation_/megaface/run.sh
diff --git a/recognition/tools/README.md b/recognition/_tools_/README.md
similarity index 100%
rename from recognition/tools/README.md
rename to recognition/_tools_/README.md
diff --git a/recognition/tools/cpp-align/FacePreprocess.h b/recognition/_tools_/cpp_align/face_align.h
similarity index 100%
rename from recognition/tools/cpp-align/FacePreprocess.h
rename to recognition/_tools_/cpp_align/face_align.h
diff --git a/recognition/tools/mask_renderer.py b/recognition/_tools_/mask_renderer.py
similarity index 100%
rename from recognition/tools/mask_renderer.py
rename to recognition/_tools_/mask_renderer.py
diff --git a/recognition/ArcFace/README.md b/recognition/arcface_mxnet/README.md
similarity index 93%
rename from recognition/ArcFace/README.md
rename to recognition/arcface_mxnet/README.md
index 0fc1555..3d463f7 100644
--- a/recognition/ArcFace/README.md
+++ b/recognition/arcface_mxnet/README.md
@@ -2,13 +2,13 @@
### Memory Consumption and Training Speed
-
+
Parallel acceleration on both feature x and centre W. Setting: ResNet 50, batch size 8 * 64, feature dimension 512, float point 32, GPU 8 * P40 (24GB).
### Illustration of Main Steps
-
+
Parallel calculation by simple matrix partition. Setting: ResNet 50, batch size 8 * 64, feature dimension 512, float point 32, identity number 1 Million, GPU 8 * 1080ti (11GB). Communication cost: 1MB (feature x). Training speed: 800 samples/second.
diff --git a/recognition/common/build_eval_pack.py b/recognition/arcface_mxnet/common/build_eval_pack.py
similarity index 100%
rename from recognition/common/build_eval_pack.py
rename to recognition/arcface_mxnet/common/build_eval_pack.py
diff --git a/recognition/common/face_align.py b/recognition/arcface_mxnet/common/face_align.py
similarity index 100%
rename from recognition/common/face_align.py
rename to recognition/arcface_mxnet/common/face_align.py
diff --git a/recognition/common/flops_counter.py b/recognition/arcface_mxnet/common/flops_counter.py
similarity index 100%
rename from recognition/common/flops_counter.py
rename to recognition/arcface_mxnet/common/flops_counter.py
diff --git a/recognition/common/rec2image.py b/recognition/arcface_mxnet/common/rec2image.py
similarity index 100%
rename from recognition/common/rec2image.py
rename to recognition/arcface_mxnet/common/rec2image.py
diff --git a/recognition/common/rec2shufrec.py b/recognition/arcface_mxnet/common/rec2shufrec.py
similarity index 100%
rename from recognition/common/rec2shufrec.py
rename to recognition/arcface_mxnet/common/rec2shufrec.py
diff --git a/recognition/common/rec_builder.py b/recognition/arcface_mxnet/common/rec_builder.py
similarity index 100%
rename from recognition/common/rec_builder.py
rename to recognition/arcface_mxnet/common/rec_builder.py
diff --git a/recognition/common/verification.py b/recognition/arcface_mxnet/common/verification.py
similarity index 100%
rename from recognition/common/verification.py
rename to recognition/arcface_mxnet/common/verification.py
diff --git a/recognition/ArcFace/image_iter.py b/recognition/arcface_mxnet/image_iter.py
similarity index 100%
rename from recognition/ArcFace/image_iter.py
rename to recognition/arcface_mxnet/image_iter.py
diff --git a/recognition/ArcFace/metric.py b/recognition/arcface_mxnet/metric.py
similarity index 100%
rename from recognition/ArcFace/metric.py
rename to recognition/arcface_mxnet/metric.py
diff --git a/recognition/ArcFace/parall_module_local_v1.py b/recognition/arcface_mxnet/parall_module_local_v1.py
similarity index 100%
rename from recognition/ArcFace/parall_module_local_v1.py
rename to recognition/arcface_mxnet/parall_module_local_v1.py
diff --git a/recognition/ArcFace/sample_config.py b/recognition/arcface_mxnet/sample_config.py
similarity index 100%
rename from recognition/ArcFace/sample_config.py
rename to recognition/arcface_mxnet/sample_config.py
diff --git a/recognition/symbol/fdensenet.py b/recognition/arcface_mxnet/symbol/fdensenet.py
similarity index 100%
rename from recognition/symbol/fdensenet.py
rename to recognition/arcface_mxnet/symbol/fdensenet.py
diff --git a/recognition/symbol/fmnasnet.py b/recognition/arcface_mxnet/symbol/fmnasnet.py
similarity index 100%
rename from recognition/symbol/fmnasnet.py
rename to recognition/arcface_mxnet/symbol/fmnasnet.py
diff --git a/recognition/symbol/fmobilefacenet.py b/recognition/arcface_mxnet/symbol/fmobilefacenet.py
similarity index 100%
rename from recognition/symbol/fmobilefacenet.py
rename to recognition/arcface_mxnet/symbol/fmobilefacenet.py
diff --git a/recognition/symbol/fmobilenet.py b/recognition/arcface_mxnet/symbol/fmobilenet.py
similarity index 100%
rename from recognition/symbol/fmobilenet.py
rename to recognition/arcface_mxnet/symbol/fmobilenet.py
diff --git a/recognition/symbol/fresnet.py b/recognition/arcface_mxnet/symbol/fresnet.py
similarity index 100%
rename from recognition/symbol/fresnet.py
rename to recognition/arcface_mxnet/symbol/fresnet.py
diff --git a/recognition/symbol/memonger.py b/recognition/arcface_mxnet/symbol/memonger.py
similarity index 100%
rename from recognition/symbol/memonger.py
rename to recognition/arcface_mxnet/symbol/memonger.py
diff --git a/recognition/symbol/memonger_v2.py b/recognition/arcface_mxnet/symbol/memonger_v2.py
similarity index 100%
rename from recognition/symbol/memonger_v2.py
rename to recognition/arcface_mxnet/symbol/memonger_v2.py
diff --git a/recognition/symbol/symbol_utils.py b/recognition/arcface_mxnet/symbol/symbol_utils.py
similarity index 100%
rename from recognition/symbol/symbol_utils.py
rename to recognition/arcface_mxnet/symbol/symbol_utils.py
diff --git a/recognition/symbol/vargfacenet.py b/recognition/arcface_mxnet/symbol/vargfacenet.py
similarity index 100%
rename from recognition/symbol/vargfacenet.py
rename to recognition/arcface_mxnet/symbol/vargfacenet.py
diff --git a/recognition/ArcFace/train.py b/recognition/arcface_mxnet/train.py
similarity index 99%
rename from recognition/ArcFace/train.py
rename to recognition/arcface_mxnet/train.py
index 8cae863..2f2ebad 100644
--- a/recognition/ArcFace/train.py
+++ b/recognition/arcface_mxnet/train.py
@@ -16,10 +16,10 @@ import argparse
import mxnet.optimizer as optimizer
from config import config, default, generate_config
from metric import *
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import flops_counter
import verification
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
import fresnet
import fmobilefacenet
import fmobilenet
diff --git a/recognition/ArcFace/train_parall.py b/recognition/arcface_mxnet/train_parall.py
similarity index 99%
rename from recognition/ArcFace/train_parall.py
rename to recognition/arcface_mxnet/train_parall.py
index 1351baa..ce6b916 100644
--- a/recognition/ArcFace/train_parall.py
+++ b/recognition/arcface_mxnet/train_parall.py
@@ -20,11 +20,11 @@ import mxnet as mx
from mxnet import ndarray as nd
import argparse
import mxnet.optimizer as optimizer
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import flops_counter
from config import config, default, generate_config
import verification
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
import fresnet
import fmobilefacenet
import fmobilenet
diff --git a/recognition/ArcFace/triplet_image_iter.py b/recognition/arcface_mxnet/triplet_image_iter.py
similarity index 100%
rename from recognition/ArcFace/triplet_image_iter.py
rename to recognition/arcface_mxnet/triplet_image_iter.py
diff --git a/recognition/ArcFace/verification.py b/recognition/arcface_mxnet/verification.py
similarity index 100%
rename from recognition/ArcFace/verification.py
rename to recognition/arcface_mxnet/verification.py
diff --git a/recognition/SubCenter-ArcFace/README.md b/recognition/subcenter_arcface/README.md
similarity index 89%
rename from recognition/SubCenter-ArcFace/README.md
rename to recognition/subcenter_arcface/README.md
index 56a91b5..ed642fd 100644
--- a/recognition/SubCenter-ArcFace/README.md
+++ b/recognition/subcenter_arcface/README.md
@@ -5,14 +5,14 @@
We introduce one extra hyperparameter (subcenter number `loss_K`) to ArcFace to relax the intra-class compactness constraint. In our experiments, we find ``loss_K=3`` can achieve a good balance between accuracy and robustness.
-
+
### 2. Implementation
The training process of Subcenter ArcFace is almost same as [ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/ArcFace)
The increased GPU memory consumption can be easily alleviated by our parallel framework.
-
+
### 3. Training Dataset
diff --git a/recognition/subcenter_arcface/common/build_eval_pack.py b/recognition/subcenter_arcface/common/build_eval_pack.py
new file mode 100644
index 0000000..23208ce
--- /dev/null
+++ b/recognition/subcenter_arcface/common/build_eval_pack.py
@@ -0,0 +1,136 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+#import mxnet as mx
+#from mxnet import ndarray as nd
+import argparse
+import cv2
+import pickle
+import numpy as np
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'common'))
+sys.path.append(
+ os.path.join(os.path.dirname(__file__), '..', '..', 'RetinaFace'))
+import face_align
+from retinaface import RetinaFace
+
+
+def to_rgb(img):
+ w, h = img.shape
+ ret = np.empty((w, h, 3), dtype=np.uint8)
+ ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+ return ret
+
+
+def IOU(Reframe, GTframe):
+ x1 = Reframe[0]
+ y1 = Reframe[1]
+ width1 = Reframe[2] - Reframe[0]
+ height1 = Reframe[3] - Reframe[1]
+
+ x2 = GTframe[0]
+ y2 = GTframe[1]
+ width2 = GTframe[2] - GTframe[0]
+ height2 = GTframe[3] - GTframe[1]
+
+ endx = max(x1 + width1, x2 + width2)
+ startx = min(x1, x2)
+ width = width1 + width2 - (endx - startx)
+
+ endy = max(y1 + height1, y2 + height2)
+ starty = min(y1, y2)
+ height = height1 + height2 - (endy - starty)
+
+ if width <= 0 or height <= 0:
+ ratio = 0
+ else:
+ Area = width * height
+ Area1 = width1 * height1
+ Area2 = width2 * height2
+ ratio = Area * 1. / (Area1 + Area2 - Area)
+ return ratio
+
+
+parser = argparse.ArgumentParser(description='Package eval images')
+# general
+parser.add_argument('--data-dir', default='', help='')
+parser.add_argument('--image-size', type=int, default=112, help='')
+parser.add_argument('--gpu', type=int, default=0, help='')
+parser.add_argument('--det-prefix', type=str, default='./model/R50', help='')
+parser.add_argument('--output', default='./', help='path to save.')
+parser.add_argument('--align-mode', default='arcface', help='align mode.')
+args = parser.parse_args()
+
+gpu_id = args.gpu
+
+detector = RetinaFace(args.det_prefix, 0, gpu_id, network='net3')
+target_size = 400
+max_size = 800
+
+
+def get_norm_crop(image_path):
+ im = cv2.imread(image_path)
+ im_shape = im.shape
+ im_size_min = np.min(im_shape[0:2])
+ im_size_max = np.max(im_shape[0:2])
+ im_scale = float(target_size) / float(im_size_min)
+ # prevent bigger axis from being more than max_size:
+ if np.round(im_scale * im_size_max) > max_size:
+ im_scale = float(max_size) / float(im_size_max)
+ bbox, landmark = detector.detect(im, threshold=0.5, scales=[im_scale])
+ #print(im.shape, bbox.shape, landmark.shape)
+ if bbox.shape[0] == 0:
+ bbox, landmark = detector.detect(
+ im,
+ threshold=0.05,
+ scales=[im_scale * 0.75, im_scale, im_scale * 2.0])
+ print('refine', im.shape, bbox.shape, landmark.shape)
+ nrof_faces = bbox.shape[0]
+ if nrof_faces > 0:
+ det = bbox[:, 0:4]
+ img_size = np.asarray(im.shape)[0:2]
+ bindex = 0
+ if nrof_faces > 1:
+ bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] -
+ det[:, 1])
+ img_center = img_size / 2
+ offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
+ (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
+ offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+ bindex = np.argmax(bounding_box_size - offset_dist_squared *
+ 2.0) # some extra weight on the centering
+ #_bbox = bounding_boxes[bindex, 0:4]
+ _landmark = landmark[bindex]
+ warped = face_align.norm_crop(im,
+ landmark=_landmark,
+ image_size=args.image_size,
+ mode=args.align_mode)
+ return warped
+ else:
+ return None
+
+
+bins = []
+issame_list = []
+pp = 0
+for line in open(os.path.join(args.data_dir, 'pairs_label.txt'), 'r'):
+ pp += 1
+ if pp % 100 == 0:
+ print('processing', pp)
+ line = line.strip().split()
+ assert len(line) == 3
+ path1 = os.path.join(args.data_dir, line[0])
+ path2 = os.path.join(args.data_dir, line[1])
+ im1 = get_norm_crop(path1)
+ im2 = get_norm_crop(path2)
+ issame = True
+ if line[2] == '0':
+ issame = False
+ issame_list.append(issame)
+ for im in [im1, im2]:
+ _, s = cv2.imencode('.jpg', im)
+ bins.append(s)
+
+with open(args.output, 'wb') as f:
+ pickle.dump((bins, issame_list), f, protocol=pickle.HIGHEST_PROTOCOL)
diff --git a/recognition/subcenter_arcface/common/face_align.py b/recognition/subcenter_arcface/common/face_align.py
new file mode 100644
index 0000000..4f48a76
--- /dev/null
+++ b/recognition/subcenter_arcface/common/face_align.py
@@ -0,0 +1,71 @@
+import cv2
+import numpy as np
+from skimage import transform as trans
+
+src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007],
+ [51.157, 89.050], [57.025, 89.702]],
+ dtype=np.float32)
+#<--left
+src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111],
+ [45.177, 86.190], [64.246, 86.758]],
+ dtype=np.float32)
+
+#---frontal
+src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493],
+ [42.463, 87.010], [69.537, 87.010]],
+ dtype=np.float32)
+
+#-->right
+src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111],
+ [48.167, 86.758], [67.236, 86.190]],
+ dtype=np.float32)
+
+#-->right profile
+src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007],
+ [55.388, 89.702], [61.257, 89.050]],
+ dtype=np.float32)
+
+src = np.array([src1, src2, src3, src4, src5])
+src_map = {112: src, 224: src * 2}
+
+arcface_src = np.array(
+ [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
+ [41.5493, 92.3655], [70.7299, 92.2041]],
+ dtype=np.float32)
+
+arcface_src = np.expand_dims(arcface_src, axis=0)
+
+# In[66]:
+
+
+# lmk is prediction; src is template
+def estimate_norm(lmk, image_size=112, mode='arcface'):
+ assert lmk.shape == (5, 2)
+ tform = trans.SimilarityTransform()
+ lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1)
+ min_M = []
+ min_index = []
+ min_error = float('inf')
+ if mode == 'arcface':
+ assert image_size == 112
+ src = arcface_src
+ else:
+ src = src_map[image_size]
+ for i in np.arange(src.shape[0]):
+ tform.estimate(lmk, src[i])
+ M = tform.params[0:2, :]
+ results = np.dot(M, lmk_tran.T)
+ results = results.T
+ error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1)))
+ # print(error)
+ if error < min_error:
+ min_error = error
+ min_M = M
+ min_index = i
+ return min_M, min_index
+
+
+def norm_crop(img, landmark, image_size=112, mode='arcface'):
+ M, pose_index = estimate_norm(landmark, image_size, mode)
+ warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
+ return warped
diff --git a/recognition/subcenter_arcface/common/flops_counter.py b/recognition/subcenter_arcface/common/flops_counter.py
new file mode 100644
index 0000000..8094241
--- /dev/null
+++ b/recognition/subcenter_arcface/common/flops_counter.py
@@ -0,0 +1,120 @@
+'''
+@author: insightface
+'''
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import sys
+import os
+import json
+import argparse
+import numpy as np
+import mxnet as mx
+
+
+def is_no_bias(attr):
+ ret = False
+ if 'no_bias' in attr and (attr['no_bias'] == True
+ or attr['no_bias'] == 'True'):
+ ret = True
+ return ret
+
+
+def count_fc_flops(input_filter, output_filter, attr):
+ #print(input_filter, output_filter ,attr)
+ ret = 2 * input_filter * output_filter
+ if is_no_bias(attr):
+ ret -= output_filter
+ return int(ret)
+
+
+def count_conv_flops(input_shape, output_shape, attr):
+ kernel = attr['kernel'][1:-1].split(',')
+ kernel = [int(x) for x in kernel]
+
+ #print('kernel', kernel)
+ if is_no_bias(attr):
+ ret = (2 * input_shape[1] * kernel[0] * kernel[1] -
+ 1) * output_shape[2] * output_shape[3] * output_shape[1]
+ else:
+ ret = 2 * input_shape[1] * kernel[0] * kernel[1] * output_shape[
+ 2] * output_shape[3] * output_shape[1]
+ num_group = 1
+ if 'num_group' in attr:
+ num_group = int(attr['num_group'])
+ ret /= num_group
+ return int(ret)
+
+
+def count_flops(sym, **data_shapes):
+ all_layers = sym.get_internals()
+ #print(all_layers)
+ arg_shapes, out_shapes, aux_shapes = all_layers.infer_shape(**data_shapes)
+ out_shape_dict = dict(zip(all_layers.list_outputs(), out_shapes))
+
+ nodes = json.loads(sym.tojson())['nodes']
+ nodeid_shape = {}
+ for nodeid, node in enumerate(nodes):
+ name = node['name']
+ layer_name = name + "_output"
+ if layer_name in out_shape_dict:
+ nodeid_shape[nodeid] = out_shape_dict[layer_name]
+ #print(nodeid_shape)
+ FLOPs = 0
+ for nodeid, node in enumerate(nodes):
+ flops = 0
+ if node['op'] == 'Convolution':
+ output_shape = nodeid_shape[nodeid]
+ name = node['name']
+ attr = node['attrs']
+ input_nodeid = node['inputs'][0][0]
+ input_shape = nodeid_shape[input_nodeid]
+ flops = count_conv_flops(input_shape, output_shape, attr)
+ elif node['op'] == 'FullyConnected':
+ attr = node['attrs']
+ output_shape = nodeid_shape[nodeid]
+ input_nodeid = node['inputs'][0][0]
+ input_shape = nodeid_shape[input_nodeid]
+ output_filter = output_shape[1]
+ input_filter = input_shape[1] * input_shape[2] * input_shape[3]
+ #assert len(input_shape)==4 and input_shape[2]==1 and input_shape[3]==1
+ flops = count_fc_flops(input_filter, output_filter, attr)
+ #print(node, flops)
+ FLOPs += flops
+
+ return FLOPs
+
+
+def flops_str(FLOPs):
+ preset = [(1e12, 'T'), (1e9, 'G'), (1e6, 'M'), (1e3, 'K')]
+
+ for p in preset:
+ if FLOPs // p[0] > 0:
+ N = FLOPs / p[0]
+ ret = "%.1f%s" % (N, p[1])
+ return ret
+ ret = "%.1f" % (FLOPs)
+ return ret
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='flops counter')
+ # general
+ #parser.add_argument('--model', default='../models2/y2-arcface-retinat1/model,1', help='path to load model.')
+ #parser.add_argument('--model', default='../models2/r100fc-arcface-retinaa/model,1', help='path to load model.')
+ parser.add_argument('--model',
+ default='../models2/r50fc-arcface-emore/model,1',
+ help='path to load model.')
+ args = parser.parse_args()
+ _vec = args.model.split(',')
+ assert len(_vec) == 2
+ prefix = _vec[0]
+ epoch = int(_vec[1])
+ print('loading', prefix, epoch)
+ sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
+ all_layers = sym.get_internals()
+ sym = all_layers['fc1_output']
+ FLOPs = count_flops(sym, data=(1, 3, 112, 112))
+ print('FLOPs:', FLOPs)
diff --git a/recognition/subcenter_arcface/common/rec2image.py b/recognition/subcenter_arcface/common/rec2image.py
new file mode 100644
index 0000000..21e5ec4
--- /dev/null
+++ b/recognition/subcenter_arcface/common/rec2image.py
@@ -0,0 +1,60 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import mxnet as mx
+from mxnet import ndarray as nd
+import random
+import argparse
+import cv2
+import time
+import sklearn
+import numpy as np
+
+
+def main(args):
+ include_datasets = args.include.split(',')
+ rec_list = []
+ for ds in include_datasets:
+ path_imgrec = os.path.join(ds, 'train.rec')
+ path_imgidx = os.path.join(ds, 'train.idx')
+ imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
+ rec_list.append(imgrec)
+ if not os.path.exists(args.output):
+ os.makedirs(args.output)
+ for ds_id in range(len(rec_list)):
+ id_list = []
+ imgrec = rec_list[ds_id]
+ s = imgrec.read_idx(0)
+ header, _ = mx.recordio.unpack(s)
+ assert header.flag > 0
+ print('header0 label', header.label)
+ header0 = (int(header.label[0]), int(header.label[1]))
+ seq_identity = range(int(header.label[0]), int(header.label[1]))
+ pp = 0
+ for identity in seq_identity:
+ id_dir = os.path.join(args.output, "%d_%d" % (ds_id, identity))
+ os.makedirs(id_dir)
+ pp += 1
+ if pp % 10 == 0:
+ print('processing id', pp)
+ s = imgrec.read_idx(identity)
+ header, _ = mx.recordio.unpack(s)
+ imgid = 0
+ for _idx in range(int(header.label[0]), int(header.label[1])):
+ s = imgrec.read_idx(_idx)
+ _header, _img = mx.recordio.unpack(s)
+ _img = mx.image.imdecode(_img).asnumpy()[:, :, ::-1] # to bgr
+ image_path = os.path.join(id_dir, "%d.jpg" % imgid)
+ cv2.imwrite(image_path, _img)
+ imgid += 1
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='do dataset merge')
+ # general
+ parser.add_argument('--include', default='', type=str, help='')
+ parser.add_argument('--output', default='', type=str, help='')
+ args = parser.parse_args()
+ main(args)
diff --git a/recognition/subcenter_arcface/common/rec2shufrec.py b/recognition/subcenter_arcface/common/rec2shufrec.py
new file mode 100644
index 0000000..cf916b4
--- /dev/null
+++ b/recognition/subcenter_arcface/common/rec2shufrec.py
@@ -0,0 +1,72 @@
+import os
+import os.path as osp
+import sys
+import datetime
+import glob
+import shutil
+import numbers
+import mxnet as mx
+from mxnet import ndarray as nd
+from mxnet import io
+from mxnet import recordio
+import random
+import argparse
+import cv2
+import time
+import numpy as np
+
+def main(args):
+ ds = args.input
+ path_imgrec = osp.join(ds, 'train.rec')
+ path_imgidx = osp.join(ds, 'train.idx')
+ imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r') # pylint: disable=redefined-variable-type
+ if not osp.exists(args.output):
+ os.makedirs(args.output)
+ writer = mx.recordio.MXRecordIO(osp.join(args.output, 'train.rec'), 'w')
+ s = imgrec.read_idx(0)
+ header, _ = recordio.unpack(s)
+ if header.flag > 0:
+ print('header0 label', header.label)
+ header0 = (int(header.label[0]), int(header.label[1]))
+ imgidx = list(range(1, int(header.label[0])))
+ else:
+ imgidx = list(imgrec.keys)
+ random.shuffle(imgidx)
+ label_stat = None
+ print('total images:', len(imgidx))
+ for i, idx in enumerate(imgidx):
+ if i%10000==0:
+ print('processing', i, idx)
+ s = imgrec.read_idx(idx)
+ header, img = mx.recordio.unpack(s)
+ label = header.label
+ if not isinstance(label, numbers.Number):
+ label = label[0]
+ if label_stat is None:
+ label_stat = [label, label]
+ else:
+ label_stat[0] = min(label, label_stat[0])
+ label_stat[1] = max(label, label_stat[1])
+ wheader = mx.recordio.IRHeader(0, label, i, 0)
+ ws = mx.recordio.pack(wheader, img)
+ writer.write(ws)
+ print('label_stat:', label_stat)
+ writer.close()
+ if args.copy_vers:
+ for binfile in glob.glob(osp.join(args.input, '*.bin')):
+ target_file = osp.join(args.output, binfile.split('/')[-1])
+ shutil.copyfile(binfile, target_file)
+ with open(osp.join(args.output, 'property'), 'w') as f:
+ f.write("%d,112,112\n"%(int(label_stat[1])+1))
+ f.write("%d\n"%len(imgidx))
+ f.write("shuffled\n")
+ f.write("%s\n"%(datetime.datetime.now()))
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='convert rec to shuffled rec')
+ # general
+ parser.add_argument('--input', default='', type=str, help='')
+ parser.add_argument('--output', default='', type=str, help='')
+ parser.add_argument('--copy-vers', action='store_true', help='copy verification bins')
+ args = parser.parse_args()
+ main(args)
diff --git a/recognition/subcenter_arcface/common/rec_builder.py b/recognition/subcenter_arcface/common/rec_builder.py
new file mode 100644
index 0000000..1d51715
--- /dev/null
+++ b/recognition/subcenter_arcface/common/rec_builder.py
@@ -0,0 +1,109 @@
+import os
+import sys
+import mxnet as mx
+from mxnet import ndarray as nd
+import random
+import argparse
+import cv2
+import time
+import sklearn
+import numpy as np
+
+
+class SeqRecBuilder():
+ def __init__(self, path, image_size=(112, 112)):
+ self.path = path
+ self.image_size = image_size
+ self.last_label = -1
+ self.widx = 0
+ if not os.path.exists(path):
+ os.makedirs(path)
+ self.writer = mx.recordio.MXIndexedRecordIO(
+ os.path.join(path, 'train.idx'), os.path.join(path, 'train.rec'),
+ 'w')
+ self.label_stat = [-1, -1]
+
+ def add(self, label, img, is_image=True):
+ #img should be BGR
+ #if self.sis:
+ # assert label>=self.last_label
+ idx = self.widx
+ self.widx += 1
+ header = mx.recordio.IRHeader(0, label, idx, 0)
+ if is_image:
+ s = mx.recordio.pack_img(header, img, quality=95, img_fmt='.jpg')
+ else:
+ s = mx.recordio.pack(header, img)
+ self.writer.write_idx(idx, s)
+ if self.label_stat[0] < 0:
+ self.label_stat = [label, label]
+ else:
+ self.label_stat[0] = min(self.label_stat[0], label)
+ self.label_stat[1] = max(self.label_stat[1], label)
+
+ def close(self):
+ with open(os.path.join(self.path, 'property'), 'w') as f:
+ f.write("%d,%d,%d\n" % (self.label_stat[1] + 1, self.image_size[0],
+ self.image_size[1]))
+
+
+class RecBuilder():
+ def __init__(self, path, image_size=(112, 112)):
+ self.path = path
+ self.image_size = image_size
+ self.last_label = -1
+ self.widx = 1
+ if not os.path.exists(path):
+ os.makedirs(path)
+ self.writer = mx.recordio.MXIndexedRecordIO(
+ os.path.join(path, 'train.idx'), os.path.join(path, 'train.rec'),
+ 'w')
+ self.label_stat = [-1, -1]
+ self.identities = []
+
+ def add(self, label, imgs):
+ #img should be BGR
+ assert label >= 0
+ assert label > self.last_label
+ assert len(imgs) > 0
+ idflag = [self.widx, -1]
+ for img in imgs:
+ idx = self.widx
+ self.widx += 1
+ header = mx.recordio.IRHeader(0, label, idx, 0)
+ if isinstance(img, np.ndarray):
+ s = mx.recordio.pack_img(header,
+ img,
+ quality=95,
+ img_fmt='.jpg')
+ else:
+ s = mx.recordio.pack(header, img)
+ self.writer.write_idx(idx, s)
+ idflag[1] = self.widx
+ self.identities.append(idflag)
+ if self.label_stat[0] < 0:
+ self.label_stat = [label, label]
+ else:
+ self.label_stat[0] = min(self.label_stat[0], label)
+ self.label_stat[1] = max(self.label_stat[1], label)
+ self.last_label = label
+
+ def close(self):
+ id_idx = self.widx
+ for id_flag in self.identities:
+ idx = self.widx
+ self.widx += 1
+ _header = mx.recordio.IRHeader(0, id_flag, idx, 0)
+ s = mx.recordio.pack(_header, b'')
+ self.writer.write_idx(idx, s)
+
+ print('id0:', (id_idx, self.widx))
+ idx = 0
+ _header = mx.recordio.IRHeader(0, (id_idx, self.widx), idx, 1)
+ s = mx.recordio.pack(_header, b'')
+ self.writer.write_idx(idx, s)
+ print('label stat:', self.label_stat)
+
+ with open(os.path.join(self.path, 'property'), 'w') as f:
+ f.write("%d,%d,%d\n" % (self.label_stat[1] + 1, self.image_size[0],
+ self.image_size[1]))
diff --git a/recognition/subcenter_arcface/common/verification.py b/recognition/subcenter_arcface/common/verification.py
new file mode 100644
index 0000000..f46942a
--- /dev/null
+++ b/recognition/subcenter_arcface/common/verification.py
@@ -0,0 +1,423 @@
+"""Helper for evaluation on the Labeled Faces in the Wild dataset
+"""
+
+# MIT License
+#
+# Copyright (c) 2016 David Sandberg
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import argparse
+import sys
+import numpy as np
+from scipy import misc
+from sklearn.model_selection import KFold
+from scipy import interpolate
+import sklearn
+import cv2
+import math
+import datetime
+import pickle
+from sklearn.decomposition import PCA
+import mxnet as mx
+from mxnet import ndarray as nd
+
+
+class LFold:
+ def __init__(self, n_splits=2, shuffle=False):
+ self.n_splits = n_splits
+ if self.n_splits > 1:
+ self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle)
+
+ def split(self, indices):
+ if self.n_splits > 1:
+ return self.k_fold.split(indices)
+ else:
+ return [(indices, indices)]
+
+
+def calculate_roc(thresholds,
+ embeddings1,
+ embeddings2,
+ actual_issame,
+ nrof_folds=10,
+ pca=0):
+ assert (embeddings1.shape[0] == embeddings2.shape[0])
+ assert (embeddings1.shape[1] == embeddings2.shape[1])
+ nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+ nrof_thresholds = len(thresholds)
+ k_fold = LFold(n_splits=nrof_folds, shuffle=False)
+
+ tprs = np.zeros((nrof_folds, nrof_thresholds))
+ fprs = np.zeros((nrof_folds, nrof_thresholds))
+ accuracy = np.zeros((nrof_folds))
+ indices = np.arange(nrof_pairs)
+ #print('pca', pca)
+
+ if pca == 0:
+ diff = np.subtract(embeddings1, embeddings2)
+ dist = np.sum(np.square(diff), 1)
+
+ for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+ #print('train_set', train_set)
+ #print('test_set', test_set)
+ if pca > 0:
+ print('doing pca on', fold_idx)
+ embed1_train = embeddings1[train_set]
+ embed2_train = embeddings2[train_set]
+ _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
+ #print(_embed_train.shape)
+ pca_model = PCA(n_components=pca)
+ pca_model.fit(_embed_train)
+ embed1 = pca_model.transform(embeddings1)
+ embed2 = pca_model.transform(embeddings2)
+ embed1 = sklearn.preprocessing.normalize(embed1)
+ embed2 = sklearn.preprocessing.normalize(embed2)
+ #print(embed1.shape, embed2.shape)
+ diff = np.subtract(embed1, embed2)
+ dist = np.sum(np.square(diff), 1)
+
+ # Find the best threshold for the fold
+ acc_train = np.zeros((nrof_thresholds))
+ for threshold_idx, threshold in enumerate(thresholds):
+ _, _, acc_train[threshold_idx] = calculate_accuracy(
+ threshold, dist[train_set], actual_issame[train_set])
+ best_threshold_index = np.argmax(acc_train)
+ #print('threshold', thresholds[best_threshold_index])
+ for threshold_idx, threshold in enumerate(thresholds):
+ tprs[fold_idx,
+ threshold_idx], fprs[fold_idx,
+ threshold_idx], _ = calculate_accuracy(
+ threshold, dist[test_set],
+ actual_issame[test_set])
+ _, _, accuracy[fold_idx] = calculate_accuracy(
+ thresholds[best_threshold_index], dist[test_set],
+ actual_issame[test_set])
+
+ tpr = np.mean(tprs, 0)
+ fpr = np.mean(fprs, 0)
+ return tpr, fpr, accuracy
+
+
+def calculate_accuracy(threshold, dist, actual_issame):
+ predict_issame = np.less(dist, threshold)
+ tp = np.sum(np.logical_and(predict_issame, actual_issame))
+ fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
+ tn = np.sum(
+ np.logical_and(np.logical_not(predict_issame),
+ np.logical_not(actual_issame)))
+ fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
+
+ tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
+ fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
+ acc = float(tp + tn) / dist.size
+ return tpr, fpr, acc
+
+
+def calculate_val(thresholds,
+ embeddings1,
+ embeddings2,
+ actual_issame,
+ far_target,
+ nrof_folds=10):
+ assert (embeddings1.shape[0] == embeddings2.shape[0])
+ assert (embeddings1.shape[1] == embeddings2.shape[1])
+ nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+ nrof_thresholds = len(thresholds)
+ k_fold = LFold(n_splits=nrof_folds, shuffle=False)
+
+ val = np.zeros(nrof_folds)
+ far = np.zeros(nrof_folds)
+
+ diff = np.subtract(embeddings1, embeddings2)
+ dist = np.sum(np.square(diff), 1)
+ indices = np.arange(nrof_pairs)
+
+ for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
+
+ # Find the threshold that gives FAR = far_target
+ far_train = np.zeros(nrof_thresholds)
+ for threshold_idx, threshold in enumerate(thresholds):
+ _, far_train[threshold_idx] = calculate_val_far(
+ threshold, dist[train_set], actual_issame[train_set])
+ if np.max(far_train) >= far_target:
+ f = interpolate.interp1d(far_train, thresholds, kind='slinear')
+ threshold = f(far_target)
+ else:
+ threshold = 0.0
+
+ val[fold_idx], far[fold_idx] = calculate_val_far(
+ threshold, dist[test_set], actual_issame[test_set])
+
+ val_mean = np.mean(val)
+ far_mean = np.mean(far)
+ val_std = np.std(val)
+ return val_mean, val_std, far_mean
+
+
+def calculate_val_far(threshold, dist, actual_issame):
+ predict_issame = np.less(dist, threshold)
+ true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
+ false_accept = np.sum(
+ np.logical_and(predict_issame, np.logical_not(actual_issame)))
+ n_same = np.sum(actual_issame)
+ n_diff = np.sum(np.logical_not(actual_issame))
+ #print(true_accept, false_accept)
+ #print(n_same, n_diff)
+ val = float(true_accept) / float(n_same)
+ far = float(false_accept) / float(n_diff)
+ return val, far
+
+
+def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
+ # Calculate evaluation metrics
+ thresholds = np.arange(0, 4, 0.01)
+ embeddings1 = embeddings[0::2]
+ embeddings2 = embeddings[1::2]
+ tpr, fpr, accuracy = calculate_roc(thresholds,
+ embeddings1,
+ embeddings2,
+ np.asarray(actual_issame),
+ nrof_folds=nrof_folds,
+ pca=pca)
+ thresholds = np.arange(0, 4, 0.001)
+ val, val_std, far = calculate_val(thresholds,
+ embeddings1,
+ embeddings2,
+ np.asarray(actual_issame),
+ 1e-3,
+ nrof_folds=nrof_folds)
+ return tpr, fpr, accuracy, val, val_std, far
+
+
+def load_bin(path, image_size):
+ try:
+ with open(path, 'rb') as f:
+ bins, issame_list = pickle.load(f) #py2
+ except UnicodeDecodeError as e:
+ with open(path, 'rb') as f:
+ bins, issame_list = pickle.load(f, encoding='bytes') #py3
+ data_list = []
+ for flip in [0, 1]:
+ data = nd.empty(
+ (len(issame_list) * 2, 3, image_size[0], image_size[1]))
+ data_list.append(data)
+ for i in range(len(issame_list) * 2):
+ _bin = bins[i]
+ img = mx.image.imdecode(_bin)
+ if img.shape[1] != image_size[0]:
+ img = mx.image.resize_short(img, image_size[0])
+ img = nd.transpose(img, axes=(2, 0, 1))
+ for flip in [0, 1]:
+ if flip == 1:
+ img = mx.ndarray.flip(data=img, axis=2)
+ data_list[flip][i][:] = img
+ if i % 1000 == 0:
+ print('loading bin', i)
+ print(data_list[0].shape)
+ return (data_list, issame_list)
+
+
+def test(data_set,
+ mx_model,
+ batch_size,
+ nfolds=10,
+ data_extra=None,
+ label_shape=None):
+ print('testing verification..')
+ data_list = data_set[0]
+ issame_list = data_set[1]
+ model = mx_model
+ embeddings_list = []
+ if data_extra is not None:
+ _data_extra = nd.array(data_extra)
+ time_consumed = 0.0
+ if label_shape is None:
+ _label = nd.ones((batch_size, ))
+ else:
+ _label = nd.ones(label_shape)
+ for i in range(len(data_list)):
+ data = data_list[i]
+ embeddings = None
+ ba = 0
+ while ba < data.shape[0]:
+ bb = min(ba + batch_size, data.shape[0])
+ count = bb - ba
+ _data = nd.slice_axis(data, axis=0, begin=bb - batch_size, end=bb)
+ #print(_data.shape, _label.shape)
+ time0 = datetime.datetime.now()
+ if data_extra is None:
+ db = mx.io.DataBatch(data=(_data, ), label=(_label, ))
+ else:
+ db = mx.io.DataBatch(data=(_data, _data_extra),
+ label=(_label, ))
+ model.forward(db, is_train=False)
+ net_out = model.get_outputs()
+ #_arg, _aux = model.get_params()
+ #__arg = {}
+ #for k,v in _arg.iteritems():
+ # __arg[k] = v.as_in_context(_ctx)
+ #_arg = __arg
+ #_arg["data"] = _data.as_in_context(_ctx)
+ #_arg["softmax_label"] = _label.as_in_context(_ctx)
+ #for k,v in _arg.iteritems():
+ # print(k,v.context)
+ #exe = sym.bind(_ctx, _arg ,args_grad=None, grad_req="null", aux_states=_aux)
+ #exe.forward(is_train=False)
+ #net_out = exe.outputs
+ _embeddings = net_out[0].asnumpy()
+ time_now = datetime.datetime.now()
+ diff = time_now - time0
+ time_consumed += diff.total_seconds()
+ #print(_embeddings.shape)
+ if embeddings is None:
+ embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
+ embeddings[ba:bb, :] = _embeddings[(batch_size - count):, :]
+ ba = bb
+ embeddings_list.append(embeddings)
+
+ _xnorm = 0.0
+ _xnorm_cnt = 0
+ for embed in embeddings_list:
+ for i in range(embed.shape[0]):
+ _em = embed[i]
+ _norm = np.linalg.norm(_em)
+ #print(_em.shape, _norm)
+ _xnorm += _norm
+ _xnorm_cnt += 1
+ _xnorm /= _xnorm_cnt
+
+ embeddings = embeddings_list[0].copy()
+ embeddings = sklearn.preprocessing.normalize(embeddings)
+ acc1 = 0.0
+ std1 = 0.0
+ #_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
+ #acc1, std1 = np.mean(accuracy), np.std(accuracy)
+
+ #print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
+ #embeddings = np.concatenate(embeddings_list, axis=1)
+ embeddings = embeddings_list[0] + embeddings_list[1]
+ embeddings = sklearn.preprocessing.normalize(embeddings)
+ print(embeddings.shape)
+ print('infer time', time_consumed)
+ _, _, accuracy, val, val_std, far = evaluate(embeddings,
+ issame_list,
+ nrof_folds=nfolds)
+ acc2, std2 = np.mean(accuracy), np.std(accuracy)
+ return acc1, std1, acc2, std2, _xnorm, embeddings_list
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(description='do verification')
+ # general
+ parser.add_argument('--data-dir', default='', help='')
+ parser.add_argument('--model',
+ default='../model/softmax,50',
+ help='path to load model.')
+ parser.add_argument('--target',
+ default='lfw,cfp_ff,cfp_fp,agedb_30',
+ help='test targets.')
+ parser.add_argument('--gpu', default=0, type=int, help='gpu id')
+ parser.add_argument('--batch-size', default=32, type=int, help='')
+ parser.add_argument('--max', default='', type=str, help='')
+ parser.add_argument('--mode', default=0, type=int, help='')
+ parser.add_argument('--nfolds', default=10, type=int, help='')
+ args = parser.parse_args()
+ #sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+ #import face_image
+ #prop = face_image.load_property(args.data_dir)
+ #image_size = prop.image_size
+ image_size = [112, 112]
+ print('image_size', image_size)
+ ctx = mx.gpu(args.gpu)
+ nets = []
+ vec = args.model.split(',')
+ prefix = args.model.split(',')[0]
+ epochs = []
+ if len(vec) == 1:
+ pdir = os.path.dirname(prefix)
+ for fname in os.listdir(pdir):
+ if not fname.endswith('.params'):
+ continue
+ _file = os.path.join(pdir, fname)
+ if _file.startswith(prefix):
+ epoch = int(fname.split('.')[0].split('-')[1])
+ epochs.append(epoch)
+ epochs = sorted(epochs, reverse=True)
+ if len(args.max) > 0:
+ _max = [int(x) for x in args.max.split(',')]
+ assert len(_max) == 2
+ if len(epochs) > _max[1]:
+ epochs = epochs[_max[0]:_max[1]]
+
+ else:
+ epochs = [int(x) for x in vec[1].split('|')]
+ print('model number', len(epochs))
+ time0 = datetime.datetime.now()
+ for epoch in epochs:
+ print('loading', prefix, epoch)
+ sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
+ #arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
+ all_layers = sym.get_internals()
+ sym = all_layers['fc1_output']
+ model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
+ #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
+ model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0],
+ image_size[1]))])
+ model.set_params(arg_params, aux_params)
+ nets.append(model)
+ time_now = datetime.datetime.now()
+ diff = time_now - time0
+ print('model loading time', diff.total_seconds())
+
+ ver_list = []
+ ver_name_list = []
+ for name in args.target.split(','):
+ path = os.path.join(args.data_dir, name + ".bin")
+ if os.path.exists(path):
+ print('loading.. ', name)
+ data_set = load_bin(path, image_size)
+ ver_list.append(data_set)
+ ver_name_list.append(name)
+
+ if args.mode == 0:
+ for i in range(len(ver_list)):
+ results = []
+ for model in nets:
+ acc1, std1, acc2, std2, xnorm, embeddings_list = test(
+ ver_list[i], model, args.batch_size, args.nfolds)
+ print('[%s]XNorm: %f' % (ver_name_list[i], xnorm))
+ print('[%s]Accuracy: %1.5f+-%1.5f' %
+ (ver_name_list[i], acc1, std1))
+ print('[%s]Accuracy-Flip: %1.5f+-%1.5f' %
+ (ver_name_list[i], acc2, std2))
+ results.append(acc2)
+ print('Max of [%s] is %1.5f' % (ver_name_list[i], np.max(results)))
+ elif args.mode == 1:
+ model = nets[0]
+ test_badcase(ver_list[0], model, args.batch_size, args.target)
+ else:
+ model = nets[0]
+ dumpR(ver_list[0], model, args.batch_size, args.target)
diff --git a/recognition/SubCenter-ArcFace/drop.py b/recognition/subcenter_arcface/drop.py
similarity index 100%
rename from recognition/SubCenter-ArcFace/drop.py
rename to recognition/subcenter_arcface/drop.py
diff --git a/recognition/SubCenter-ArcFace/image_iter.py b/recognition/subcenter_arcface/image_iter.py
similarity index 100%
rename from recognition/SubCenter-ArcFace/image_iter.py
rename to recognition/subcenter_arcface/image_iter.py
diff --git a/recognition/SubCenter-ArcFace/parall_module_local_v1.py b/recognition/subcenter_arcface/parall_module_local_v1.py
similarity index 100%
rename from recognition/SubCenter-ArcFace/parall_module_local_v1.py
rename to recognition/subcenter_arcface/parall_module_local_v1.py
diff --git a/recognition/SubCenter-ArcFace/sample_config.py b/recognition/subcenter_arcface/sample_config.py
similarity index 100%
rename from recognition/SubCenter-ArcFace/sample_config.py
rename to recognition/subcenter_arcface/sample_config.py
diff --git a/recognition/subcenter_arcface/symbol/fdensenet.py b/recognition/subcenter_arcface/symbol/fdensenet.py
new file mode 100644
index 0000000..b3d49ee
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/fdensenet.py
@@ -0,0 +1,169 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+# pylint: disable= arguments-differ
+"""DenseNet, implemented in Gluon."""
+
+import sys
+import os
+import mxnet as mx
+import mxnet.ndarray as nd
+import mxnet.gluon as gluon
+import mxnet.gluon.nn as nn
+import mxnet.autograd as ag
+import symbol_utils
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
+
+
+def Act():
+ if config.net_act == 'prelu':
+ return nn.PReLU()
+ else:
+ return nn.Activation(config.net_act)
+
+
+# Helpers
+def _make_dense_block(num_layers, bn_size, growth_rate, dropout, stage_index):
+ out = nn.HybridSequential(prefix='stage%d_' % stage_index)
+ with out.name_scope():
+ for _ in range(num_layers):
+ out.add(_make_dense_layer(growth_rate, bn_size, dropout))
+ return out
+
+
+def _make_dense_layer(growth_rate, bn_size, dropout):
+ new_features = nn.HybridSequential(prefix='')
+ new_features.add(nn.BatchNorm())
+ #new_features.add(nn.Activation('relu'))
+ new_features.add(Act())
+ new_features.add(
+ nn.Conv2D(bn_size * growth_rate, kernel_size=1, use_bias=False))
+ new_features.add(nn.BatchNorm())
+ #new_features.add(nn.Activation('relu'))
+ new_features.add(Act())
+ new_features.add(
+ nn.Conv2D(growth_rate, kernel_size=3, padding=1, use_bias=False))
+ if dropout:
+ new_features.add(nn.Dropout(dropout))
+
+ out = gluon.contrib.nn.HybridConcurrent(axis=1, prefix='')
+ out.add(gluon.contrib.nn.Identity())
+ out.add(new_features)
+
+ return out
+
+
+def _make_transition(num_output_features):
+ out = nn.HybridSequential(prefix='')
+ out.add(nn.BatchNorm())
+ #out.add(nn.Activation('relu'))
+ out.add(Act())
+ out.add(nn.Conv2D(num_output_features, kernel_size=1, use_bias=False))
+ out.add(nn.AvgPool2D(pool_size=2, strides=2))
+ return out
+
+
+# Net
+class DenseNet(nn.HybridBlock):
+ r"""Densenet-BC model from the
+ `"Densely Connected Convolutional Networks" `_ paper.
+
+ Parameters
+ ----------
+ num_init_features : int
+ Number of filters to learn in the first convolution layer.
+ growth_rate : int
+ Number of filters to add each layer (`k` in the paper).
+ block_config : list of int
+ List of integers for numbers of layers in each pooling block.
+ bn_size : int, default 4
+ Multiplicative factor for number of bottle neck layers.
+ (i.e. bn_size * k features in the bottleneck layer)
+ dropout : float, default 0
+ Rate of dropout after each dense layer.
+ classes : int, default 1000
+ Number of classification classes.
+ """
+ def __init__(self,
+ num_init_features,
+ growth_rate,
+ block_config,
+ bn_size=4,
+ dropout=0,
+ classes=1000,
+ **kwargs):
+
+ super(DenseNet, self).__init__(**kwargs)
+ with self.name_scope():
+ self.features = nn.HybridSequential(prefix='')
+ self.features.add(
+ nn.Conv2D(num_init_features,
+ kernel_size=3,
+ strides=1,
+ padding=1,
+ use_bias=False))
+ self.features.add(nn.BatchNorm())
+ self.features.add(nn.Activation('relu'))
+ self.features.add(nn.MaxPool2D(pool_size=3, strides=2, padding=1))
+ # Add dense blocks
+ num_features = num_init_features
+ for i, num_layers in enumerate(block_config):
+ self.features.add(
+ _make_dense_block(num_layers, bn_size, growth_rate,
+ dropout, i + 1))
+ num_features = num_features + num_layers * growth_rate
+ if i != len(block_config) - 1:
+ self.features.add(_make_transition(num_features // 2))
+ num_features = num_features // 2
+ self.features.add(nn.BatchNorm())
+ self.features.add(nn.Activation('relu'))
+ #self.features.add(nn.AvgPool2D(pool_size=7))
+ #self.features.add(nn.Flatten())
+
+ #self.output = nn.Dense(classes)
+
+ def hybrid_forward(self, F, x):
+ x = self.features(x)
+ #x = self.output(x)
+ return x
+
+
+# Specification
+densenet_spec = {
+ 121: (64, 32, [6, 12, 24, 16]),
+ 161: (96, 48, [6, 12, 36, 24]),
+ 169: (64, 32, [6, 12, 32, 32]),
+ 201: (64, 32, [6, 12, 48, 32])
+}
+
+
+# Constructor
+def get_symbol():
+ num_layers = config.num_layers
+ num_init_features, growth_rate, block_config = densenet_spec[num_layers]
+ net = DenseNet(num_init_features,
+ growth_rate,
+ block_config,
+ dropout=config.densenet_dropout)
+ data = mx.sym.Variable(name='data')
+ data = data - 127.5
+ data = data * 0.0078125
+ body = net(data)
+ fc1 = symbol_utils.get_fc1(body, config.emb_size, config.net_output)
+ return fc1
diff --git a/recognition/subcenter_arcface/symbol/fmnasnet.py b/recognition/subcenter_arcface/symbol/fmnasnet.py
new file mode 100644
index 0000000..118beb9
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/fmnasnet.py
@@ -0,0 +1,213 @@
+import sys
+import os
+import mxnet as mx
+import mxnet.ndarray as nd
+import mxnet.gluon as gluon
+import mxnet.gluon.nn as nn
+import mxnet.autograd as ag
+import symbol_utils
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
+
+
+def Act():
+ if config.net_act == 'prelu':
+ return nn.PReLU()
+ else:
+ return nn.Activation(config.net_act)
+
+
+def ConvBlock(channels, kernel_size, strides, **kwargs):
+ out = nn.HybridSequential(**kwargs)
+ with out.name_scope():
+ out.add(
+ nn.Conv2D(channels,
+ kernel_size,
+ strides=strides,
+ padding=1,
+ use_bias=False), nn.BatchNorm(scale=True),
+ Act()
+ #nn.Activation('relu')
+ )
+ return out
+
+
+def Conv1x1(channels, is_linear=False, **kwargs):
+ out = nn.HybridSequential(**kwargs)
+ with out.name_scope():
+ out.add(nn.Conv2D(channels, 1, padding=0, use_bias=False),
+ nn.BatchNorm(scale=True))
+ if not is_linear:
+ #out.add(nn.Activation('relu'))
+ out.add(Act())
+ return out
+
+
+def DWise(channels, strides, kernel_size=3, **kwargs):
+ out = nn.HybridSequential(**kwargs)
+ with out.name_scope():
+ out.add(
+ nn.Conv2D(channels,
+ kernel_size,
+ strides=strides,
+ padding=kernel_size // 2,
+ groups=channels,
+ use_bias=False), nn.BatchNorm(scale=True),
+ Act()
+ #nn.Activation('relu')
+ )
+ return out
+
+
+class SepCONV(nn.HybridBlock):
+ def __init__(self,
+ inp,
+ output,
+ kernel_size,
+ depth_multiplier=1,
+ with_bn=True,
+ **kwargs):
+ super(SepCONV, self).__init__(**kwargs)
+ with self.name_scope():
+ self.net = nn.HybridSequential()
+ cn = int(inp * depth_multiplier)
+
+ if output is None:
+ self.net.add(
+ nn.Conv2D(in_channels=inp,
+ channels=cn,
+ groups=inp,
+ kernel_size=kernel_size,
+ strides=(1, 1),
+ padding=kernel_size // 2,
+ use_bias=not with_bn))
+ else:
+ self.net.add(
+ nn.Conv2D(in_channels=inp,
+ channels=cn,
+ groups=inp,
+ kernel_size=kernel_size,
+ strides=(1, 1),
+ padding=kernel_size // 2,
+ use_bias=False),
+ nn.BatchNorm(),
+ Act(),
+ #nn.Activation('relu'),
+ nn.Conv2D(in_channels=cn,
+ channels=output,
+ kernel_size=(1, 1),
+ strides=(1, 1),
+ use_bias=not with_bn))
+
+ self.with_bn = with_bn
+ self.act = Act()
+ #self.act = nn.Activation('relu')
+ if with_bn:
+ self.bn = nn.BatchNorm()
+
+ def hybrid_forward(self, F, x):
+ x = self.net(x)
+ if self.with_bn:
+ x = self.bn(x)
+ if self.act is not None:
+ x = self.act(x)
+ return x
+
+
+class ExpandedConv(nn.HybridBlock):
+ def __init__(self,
+ inp,
+ oup,
+ t,
+ strides,
+ kernel=3,
+ same_shape=True,
+ **kwargs):
+ super(ExpandedConv, self).__init__(**kwargs)
+
+ self.same_shape = same_shape
+ self.strides = strides
+ with self.name_scope():
+ self.bottleneck = nn.HybridSequential()
+ self.bottleneck.add(
+ Conv1x1(inp * t, prefix="expand_"),
+ DWise(inp * t, self.strides, kernel, prefix="dwise_"),
+ Conv1x1(oup, is_linear=True, prefix="linear_"))
+
+ def hybrid_forward(self, F, x):
+ out = self.bottleneck(x)
+ if self.strides == 1 and self.same_shape:
+ out = F.elemwise_add(out, x)
+ return out
+
+
+def ExpandedConvSequence(t, k, inp, oup, repeats, first_strides, **kwargs):
+ seq = nn.HybridSequential(**kwargs)
+ with seq.name_scope():
+ seq.add(ExpandedConv(inp, oup, t, first_strides, k, same_shape=False))
+ curr_inp = oup
+ for i in range(1, repeats):
+ seq.add(ExpandedConv(curr_inp, oup, t, 1))
+ curr_inp = oup
+ return seq
+
+
+class MNasNet(nn.HybridBlock):
+ def __init__(self, m=1.0, **kwargs):
+ super(MNasNet, self).__init__(**kwargs)
+
+ self.first_oup = int(32 * m)
+ self.second_oup = int(16 * m)
+ #self.second_oup = int(32*m)
+ self.interverted_residual_setting = [
+ # t, c, n, s, k
+ [3, int(24 * m), 3, 2, 3, "stage2_"], # -> 56x56
+ [3, int(40 * m), 3, 2, 5, "stage3_"], # -> 28x28
+ [6, int(80 * m), 3, 2, 5, "stage4_1_"], # -> 14x14
+ [6, int(96 * m), 2, 1, 3, "stage4_2_"], # -> 14x14
+ [6, int(192 * m), 4, 2, 5, "stage5_1_"], # -> 7x7
+ [6, int(320 * m), 1, 1, 3, "stage5_2_"], # -> 7x7
+ ]
+ self.last_channels = int(1024 * m)
+
+ with self.name_scope():
+ self.features = nn.HybridSequential()
+ self.features.add(
+ ConvBlock(self.first_oup, 3, 1, prefix="stage1_conv0_"))
+ self.features.add(
+ SepCONV(self.first_oup,
+ self.second_oup,
+ 3,
+ prefix="stage1_sepconv0_"))
+ inp = self.second_oup
+ for i, (t, c, n, s, k,
+ prefix) in enumerate(self.interverted_residual_setting):
+ oup = c
+ self.features.add(
+ ExpandedConvSequence(t, k, inp, oup, n, s, prefix=prefix))
+ inp = oup
+
+ self.features.add(Conv1x1(self.last_channels, prefix="stage5_3_"))
+ #self.features.add(nn.GlobalAvgPool2D())
+ #self.features.add(nn.Flatten())
+ #self.output = nn.Dense(num_classes)
+ def hybrid_forward(self, F, x):
+ x = self.features(x)
+ #x = self.output(x)
+ return x
+
+ def num_output_channel(self):
+ return self.last_channels
+
+
+def get_symbol():
+ net = MNasNet(config.net_multiplier)
+ data = mx.sym.Variable(name='data')
+ data = data - 127.5
+ data = data * 0.0078125
+ body = net(data)
+ fc1 = symbol_utils.get_fc1(body,
+ config.emb_size,
+ config.net_output,
+ input_channel=net.num_output_channel())
+ return fc1
diff --git a/recognition/subcenter_arcface/symbol/fmobilefacenet.py b/recognition/subcenter_arcface/symbol/fmobilefacenet.py
new file mode 100644
index 0000000..f498264
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/fmobilefacenet.py
@@ -0,0 +1,224 @@
+import sys
+import os
+import mxnet as mx
+import symbol_utils
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
+
+
+def Act(data, act_type, name):
+ #ignore param act_type, set it in this function
+ if act_type == 'prelu':
+ body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
+ else:
+ body = mx.sym.Activation(data=data, act_type=act_type, name=name)
+ return body
+
+
+def Conv(data,
+ num_filter=1,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ num_group=1,
+ name=None,
+ suffix=''):
+ conv = mx.sym.Convolution(data=data,
+ num_filter=num_filter,
+ kernel=kernel,
+ num_group=num_group,
+ stride=stride,
+ pad=pad,
+ no_bias=True,
+ name='%s%s_conv2d' % (name, suffix))
+ bn = mx.sym.BatchNorm(data=conv,
+ name='%s%s_batchnorm' % (name, suffix),
+ fix_gamma=False,
+ momentum=config.bn_mom)
+ act = Act(data=bn,
+ act_type=config.net_act,
+ name='%s%s_relu' % (name, suffix))
+ return act
+
+
+def Linear(data,
+ num_filter=1,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ num_group=1,
+ name=None,
+ suffix=''):
+ conv = mx.sym.Convolution(data=data,
+ num_filter=num_filter,
+ kernel=kernel,
+ num_group=num_group,
+ stride=stride,
+ pad=pad,
+ no_bias=True,
+ name='%s%s_conv2d' % (name, suffix))
+ bn = mx.sym.BatchNorm(data=conv,
+ name='%s%s_batchnorm' % (name, suffix),
+ fix_gamma=False,
+ momentum=config.bn_mom)
+ return bn
+
+
+def ConvOnly(data,
+ num_filter=1,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ num_group=1,
+ name=None,
+ suffix=''):
+ conv = mx.sym.Convolution(data=data,
+ num_filter=num_filter,
+ kernel=kernel,
+ num_group=num_group,
+ stride=stride,
+ pad=pad,
+ no_bias=True,
+ name='%s%s_conv2d' % (name, suffix))
+ return conv
+
+
+def DResidual(data,
+ num_out=1,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ num_group=1,
+ name=None,
+ suffix=''):
+ conv = Conv(data=data,
+ num_filter=num_group,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ name='%s%s_conv_sep' % (name, suffix))
+ conv_dw = Conv(data=conv,
+ num_filter=num_group,
+ num_group=num_group,
+ kernel=kernel,
+ pad=pad,
+ stride=stride,
+ name='%s%s_conv_dw' % (name, suffix))
+ proj = Linear(data=conv_dw,
+ num_filter=num_out,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ name='%s%s_conv_proj' % (name, suffix))
+ return proj
+
+
+def Residual(data,
+ num_block=1,
+ num_out=1,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ num_group=1,
+ name=None,
+ suffix=''):
+ identity = data
+ for i in range(num_block):
+ shortcut = identity
+ conv = DResidual(data=identity,
+ num_out=num_out,
+ kernel=kernel,
+ stride=stride,
+ pad=pad,
+ num_group=num_group,
+ name='%s%s_block' % (name, suffix),
+ suffix='%d' % i)
+ identity = conv + shortcut
+ return identity
+
+
+def get_symbol():
+ num_classes = config.emb_size
+ print('in_network', config)
+ fc_type = config.net_output
+ data = mx.symbol.Variable(name="data")
+ data = data - 127.5
+ data = data * 0.0078125
+ blocks = config.net_blocks
+ conv_1 = Conv(data,
+ num_filter=64,
+ kernel=(3, 3),
+ pad=(1, 1),
+ stride=(2, 2),
+ name="conv_1")
+ if blocks[0] == 1:
+ conv_2_dw = Conv(conv_1,
+ num_group=64,
+ num_filter=64,
+ kernel=(3, 3),
+ pad=(1, 1),
+ stride=(1, 1),
+ name="conv_2_dw")
+ else:
+ conv_2_dw = Residual(conv_1,
+ num_block=blocks[0],
+ num_out=64,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ num_group=64,
+ name="res_2")
+ conv_23 = DResidual(conv_2_dw,
+ num_out=64,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ num_group=128,
+ name="dconv_23")
+ conv_3 = Residual(conv_23,
+ num_block=blocks[1],
+ num_out=64,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ num_group=128,
+ name="res_3")
+ conv_34 = DResidual(conv_3,
+ num_out=128,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ num_group=256,
+ name="dconv_34")
+ conv_4 = Residual(conv_34,
+ num_block=blocks[2],
+ num_out=128,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ num_group=256,
+ name="res_4")
+ conv_45 = DResidual(conv_4,
+ num_out=128,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ num_group=512,
+ name="dconv_45")
+ conv_5 = Residual(conv_45,
+ num_block=blocks[3],
+ num_out=128,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ num_group=256,
+ name="res_5")
+ conv_6_sep = Conv(conv_5,
+ num_filter=512,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ name="conv_6sep")
+
+ fc1 = symbol_utils.get_fc1(conv_6_sep, num_classes, fc_type)
+ return fc1
diff --git a/gender-age/fmobilenet.py b/recognition/subcenter_arcface/symbol/fmobilenet.py
similarity index 93%
rename from gender-age/fmobilenet.py
rename to recognition/subcenter_arcface/symbol/fmobilenet.py
index 6bb1c0a..bdbf8a5 100644
--- a/gender-age/fmobilenet.py
+++ b/recognition/subcenter_arcface/symbol/fmobilenet.py
@@ -15,14 +15,20 @@
# specific language governing permissions and limitations
# under the License.
+import sys
+import os
import mxnet as mx
import symbol_utils
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
def Act(data, act_type, name):
#ignore param act_type, set it in this function
- #body = mx.sym.LeakyReLU(data = data, act_type='prelu', name = name)
- body = mx.sym.Activation(data=data, act_type='relu', name=name)
+ if act_type == 'prelu':
+ body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
+ else:
+ body = mx.sym.Activation(data=data, act_type=act_type, name=name)
return body
@@ -45,7 +51,9 @@ def Conv(data,
bn = mx.sym.BatchNorm(data=conv,
name='%s%s_batchnorm' % (name, suffix),
fix_gamma=True)
- act = Act(data=bn, act_type='relu', name='%s%s_relu' % (name, suffix))
+ act = Act(data=bn,
+ act_type=config.net_act,
+ name='%s%s_relu' % (name, suffix))
return act
@@ -68,20 +76,16 @@ def ConvOnly(data,
return conv
-def get_symbol(num_classes, **kwargs):
+def get_symbol():
+ num_classes = config.emb_size
+ bn_mom = config.bn_mom
+ workspace = config.workspace
data = mx.symbol.Variable(name="data") # 224
data = data - 127.5
data = data * 0.0078125
- version_input = kwargs.get('version_input', 1)
- assert version_input >= 0
- version_output = kwargs.get('version_output', 'E')
- multiplier = kwargs.get('multiplier', 1.0)
- fc_type = version_output
- base_filter = int(32 * multiplier)
- bf = base_filter
- print(version_input, version_output, base_filter)
-
- if version_input == 0:
+ fc_type = config.net_output
+ bf = int(32 * config.net_multiplier)
+ if config.net_input == 0:
conv_1 = Conv(data,
num_filter=bf,
kernel=(3, 3),
diff --git a/gender-age/fresnet.py b/recognition/subcenter_arcface/symbol/fresnet.py
similarity index 94%
rename from gender-age/fresnet.py
rename to recognition/subcenter_arcface/symbol/fresnet.py
index 037a42b..7b17788 100644
--- a/gender-age/fresnet.py
+++ b/recognition/subcenter_arcface/symbol/fresnet.py
@@ -25,10 +25,15 @@ Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Re
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import sys
+import os
import mxnet as mx
import numpy as np
import symbol_utils
+import memonger
import sklearn
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
def Conv(**kwargs):
@@ -965,9 +970,19 @@ def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck,
bottle_neck, **kwargs)
-def resnet(units, num_stages, filter_list, num_classes, bottle_neck, **kwargs):
- bn_mom = kwargs.get('bn_mom', 0.9)
- workspace = kwargs.get('workspace', 256)
+def resnet(units, num_stages, filter_list, num_classes, bottle_neck):
+ bn_mom = config.bn_mom
+ workspace = config.workspace
+ kwargs = {
+ 'version_se': config.net_se,
+ 'version_input': config.net_input,
+ 'version_output': config.net_output,
+ 'version_unit': config.net_unit,
+ 'version_act': config.net_act,
+ 'bn_mom': bn_mom,
+ 'workspace': workspace,
+ 'memonger': config.memonger,
+ }
"""Return ResNet symbol of
Parameters
----------
@@ -984,17 +999,21 @@ def resnet(units, num_stages, filter_list, num_classes, bottle_neck, **kwargs):
workspace : int
Workspace used in convolution operator
"""
- version_se = kwargs.get('version_se', 0)
+ version_se = kwargs.get('version_se', 1)
version_input = kwargs.get('version_input', 1)
assert version_input >= 0
- version_output = kwargs.get('version_output', 'GAP')
+ version_output = kwargs.get('version_output', 'E')
fc_type = version_output
version_unit = kwargs.get('version_unit', 3)
act_type = kwargs.get('version_act', 'prelu')
- print(version_se, version_input, version_output, version_unit, act_type)
+ memonger = kwargs.get('memonger', False)
+ print(version_se, version_input, version_output, version_unit, act_type,
+ memonger)
num_unit = len(units)
assert (num_unit == num_stages)
data = mx.sym.Variable(name='data')
+ if config.fp16:
+ data = mx.sym.Cast(data=data, dtype=np.float16)
if version_input == 0:
#data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
data = mx.sym.identity(data=data, name='id')
@@ -1062,6 +1081,8 @@ def resnet(units, num_stages, filter_list, num_classes, bottle_neck, **kwargs):
#else:
# body = residual_unit(body, filter_list[i+1], (2, 2), False,
# name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, **kwargs)
+ if i==num_stages-1 and config.fp16:
+ body = mx.sym.Cast(data=body, dtype=np.float32)
body = residual_unit(body,
filter_list[i + 1], (2, 2),
False,
@@ -1076,16 +1097,34 @@ def resnet(units, num_stages, filter_list, num_classes, bottle_neck, **kwargs):
bottle_neck=bottle_neck,
**kwargs)
+ if bottle_neck:
+ body = Conv(data=body,
+ num_filter=512,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ no_bias=True,
+ name="convd",
+ workspace=workspace)
+ body = mx.sym.BatchNorm(data=body,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='bnd')
+ body = Act(data=body, act_type=act_type, name='relud')
+
fc1 = symbol_utils.get_fc1(body, num_classes, fc_type)
return fc1
-def get_symbol(num_classes, num_layers, **kwargs):
+def get_symbol():
"""
Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
Original author Wei Wu
"""
- if num_layers >= 101:
+ num_classes = config.emb_size
+ num_layers = config.num_layers
+ if num_layers >= 500:
filter_list = [64, 256, 512, 1024, 2048]
bottle_neck = True
else:
@@ -1104,10 +1143,22 @@ def get_symbol(num_classes, num_layers, **kwargs):
units = [3, 6, 24, 3]
elif num_layers == 90:
units = [3, 8, 30, 3]
+ elif num_layers == 98:
+ units = [3, 4, 38, 3]
+ elif num_layers == 99:
+ units = [3, 8, 35, 3]
elif num_layers == 100:
units = [3, 13, 30, 3]
+ elif num_layers == 134:
+ units = [3, 10, 50, 3]
+ elif num_layers == 136:
+ units = [3, 13, 48, 3]
+ elif num_layers == 140:
+ units = [3, 15, 48, 3]
elif num_layers == 124:
units = [3, 13, 40, 5]
+ elif num_layers == 160:
+ units = [3, 24, 49, 3]
elif num_layers == 101:
units = [3, 4, 23, 3]
elif num_layers == 152:
@@ -1121,9 +1172,20 @@ def get_symbol(num_classes, num_layers, **kwargs):
"no experiments done on num_layers {}, you can do it yourself".
format(num_layers))
- return resnet(units=units,
- num_stages=num_stages,
- filter_list=filter_list,
- num_classes=num_classes,
- bottle_neck=bottle_neck,
- **kwargs)
+ net = resnet(units=units,
+ num_stages=num_stages,
+ filter_list=filter_list,
+ num_classes=num_classes,
+ bottle_neck=bottle_neck)
+
+ if config.memonger:
+ dshape = (config.per_batch_size, config.image_shape[2],
+ config.image_shape[0], config.image_shape[1])
+ net_mem_planned = memonger.search_plan(net, data=dshape)
+ old_cost = memonger.get_cost(net, data=dshape)
+ new_cost = memonger.get_cost(net_mem_planned, data=dshape)
+
+ print('Old feature map cost=%d MB' % old_cost)
+ print('New feature map cost=%d MB' % new_cost)
+ net = net_mem_planned
+ return net
diff --git a/recognition/subcenter_arcface/symbol/memonger.py b/recognition/subcenter_arcface/symbol/memonger.py
new file mode 100644
index 0000000..8ad610b
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/memonger.py
@@ -0,0 +1,175 @@
+import mxnet as mx
+import math
+
+
+def prod(shape):
+ """Get product of the shape.
+ """
+ ret = 1
+ for s in shape:
+ ret *= s
+ return ret
+
+
+def is_param(name):
+ """Quick script to check if name is a parameter.
+ """
+ if name == 'data':
+ return False
+ if name.endswith('weight'):
+ return True
+ if name.endswith('bias'):
+ return True
+ if name.endswith('beta'):
+ return True
+ if name.endswith('gamma'):
+ return True
+ return False
+
+
+def make_mirror_plan(sym, threshold, plan_info=None, **kwargs):
+ """Memory allocation planner with a given threshold.
+
+ The user can pass in a network configuration,
+ a threshold that limits memory per block.
+ And input shape configurations.
+
+ Parameters
+ ----------
+ sym : symbol
+ Input configuration of symbols.
+ The user need to pre-mark the attribute "mirror_stage" on the nodes
+ that can be book-kept as stage
+
+ The algorithm will decide whether to disbale mirror on the stage nodes.
+
+ threshold: integer
+ A tuning parameter to tune the approximate size of each stage blocks
+
+ plan_info: dict, optional
+ Used to hold plan information.
+
+ **kwargs:
+ The arguments to infer shape.
+
+ Returns
+ -------
+ alloc_sym: symbol
+ A symbol with force mirror tagged on the nodes for better allocation.
+ """
+ threshold = threshold << 20
+ sym = sym.__copy__()
+ internals = sym.get_internals()
+ _, out_shapes, _ = internals.infer_shape(**kwargs)
+ shape_dict = list(zip(internals.list_outputs(), out_shapes))
+ total_size = 0
+ param_size = 0
+ local_size = 0
+ save_size = 0
+ max_size = 0
+ last_sb = None
+ last_local = 0
+ period = 1
+ last_stage = ''
+ stage_decision = ''
+
+ for idx, item in enumerate(shape_dict):
+ sb = internals[idx]
+ name, shape = item
+ if is_param(name):
+ param_size += prod(shape) * 4
+ continue
+ else:
+ total_size += prod(shape) * 4
+ local_size += prod(shape) * 4
+ sb._set_attr(force_mirroring='True')
+
+ if sb.attr('mirror_stage') is not None:
+ stage = sb.attr('mirror_stage')
+ if stage == 'True' or stage != last_stage:
+ if local_size > threshold:
+ save_size += prod(shape) * 4
+ max_size = max(max_size, local_size)
+ local_size = 0
+ stage_decision = 'False'
+ sb._set_attr(force_mirroring=stage_decision)
+ else:
+ stage_decision = 'True'
+ pass
+ last_stage = stage
+ elif stage == last_stage and stage_decision == 'False':
+ save_size += prod(shape) * 4
+ sb._set_attr(force_mirroring=stage_decision)
+
+ if plan_info is not None:
+ plan_info['max_size'] = max_size
+ plan_info['save_size'] = save_size
+ return sym
+
+
+def get_cost(sym, type_dict=None, **kwargs):
+ """Get the cost of the current symbolic plan by running bind on CPU.
+
+ sym : Symbolic Variable
+
+ """
+ texec = sym.simple_bind(ctx=mx.gpu(),
+ grad_req='write',
+ type_dict=type_dict,
+ **kwargs)
+ return int(texec.debug_str().split('\n')[-3].split()[1])
+
+
+def search_plan(sym, ntrial=6, type_dict=None, **kwargs):
+ """Quickly heurestic search over possible plans to find good memory plan.
+
+ Parameters
+ ----------
+ sym : symbolic
+ Symbolic configurations
+
+ ntrial: integer
+ Additional grid search steps
+ """
+ history = []
+ threshold = 0
+ min_threshold = None
+ min_cost = None
+ nbegin = 3
+
+ for k in range(nbegin):
+ info = {}
+ sym = make_mirror_plan(sym,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ save_size = info['save_size'] >> 20
+ local_size = info['max_size'] >> 20
+ guess = int(math.sqrt(save_size * local_size / 2))
+ if min_cost is None or min_cost > cost:
+ min_cost = cost
+ if min_threshold is None or local_size < min_threshold:
+ min_threshold = local_size
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold = guess
+
+ max_threshold = threshold * math.sqrt(2)
+ step = int((max_threshold - min_threshold) / ntrial)
+ threshold = min_threshold + step
+ if step > 0:
+ for k in range(ntrial):
+ sym = make_mirror_plan(sym,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold += step
+
+ history.sort(key=lambda x: x[0])
+ cost, threshold, sym = history[0]
+ print('Find best plan with threshold=%d, cost=%d MB' % (threshold, cost))
+ return sym
diff --git a/recognition/subcenter_arcface/symbol/memonger_v2.py b/recognition/subcenter_arcface/symbol/memonger_v2.py
new file mode 100644
index 0000000..92963de
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/memonger_v2.py
@@ -0,0 +1,300 @@
+import mxnet as mx
+import math
+
+
+def prod(shape):
+ """Get product of the shape.
+ """
+ ret = 1
+ for s in shape:
+ ret *= s
+ return ret
+
+
+def is_param(name):
+ """Quick script to check if name is a parameter.
+ """
+ if name == 'data':
+ return False
+ if name.endswith('weight'):
+ return True
+ if name.endswith('bias'):
+ return True
+ if name.endswith('beta'):
+ return True
+ if name.endswith('gamma'):
+ return True
+ return False
+
+
+def make_mirror_plan(sym, threshold, plan_info=None, **kwargs):
+ """Memory allocation planner with a given threshold.
+
+ The user can pass in a network configuration,
+ a threshold that limits memory per block.
+ And input shape configurations.
+
+ Parameters
+ ----------
+ sym : symbol
+ Input configuration of symbols.
+ The user need to pre-mark the attribute "mirror_stage" on the nodes
+ that can be book-kept as stage
+
+ The algorithm will decide whether to disbale mirror on the stage nodes.
+
+ threshold: integer
+ A tuning parameter to tune the approximate size of each stage blocks
+
+ plan_info: dict, optional
+ Used to hold plan information.
+
+ **kwargs:
+ The arguments to infer shape.
+
+ Returns
+ -------
+ alloc_sym: symbol
+ A symbol with force mirror tagged on the nodes for better allocation.
+ """
+ threshold = threshold << 20
+ sym = sym.__copy__()
+ internals = sym.get_internals()
+ _, out_shapes, _ = internals.infer_shape(**kwargs)
+ shape_dict = list(zip(internals.list_outputs(), out_shapes))
+ total_size = 0
+ param_size = 0
+ local_size = 0
+ save_size = 0
+ max_size = 0
+ last_sb = None
+ last_local = 0
+ period = 1
+ last_stage = ''
+ stage_decision = ''
+
+ for idx, item in enumerate(shape_dict):
+ sb = internals[idx]
+ name, shape = item
+ if is_param(name):
+ param_size += prod(shape) * 4
+ continue
+ else:
+ total_size += prod(shape) * 4
+ local_size += prod(shape) * 4
+ sb._set_attr(force_mirroring='True')
+
+ if sb.attr('mirror_stage') is not None:
+ stage = sb.attr('mirror_stage')
+ if stage == 'True' or stage != last_stage:
+ if local_size > threshold:
+ save_size += prod(shape) * 4
+ max_size = max(max_size, local_size)
+ local_size = 0
+ stage_decision = 'False'
+ sb._set_attr(force_mirroring=stage_decision)
+ else:
+ stage_decision = 'True'
+ pass
+ last_stage = stage
+ elif stage == last_stage and stage_decision == 'False':
+ save_size += prod(shape) * 4
+ sb._set_attr(force_mirroring=stage_decision)
+
+ if plan_info is not None:
+ plan_info['max_size'] = max_size
+ plan_info['save_size'] = save_size
+ return sym
+
+
+def get_cost(sym, type_dict=None, **kwargs):
+ """Get the cost of the current symbolic plan by running bind on CPU.
+
+ sym : Symbolic Variable
+
+ """
+ texec = sym.simple_bind(ctx=mx.gpu(),
+ grad_req='write',
+ type_dict=type_dict,
+ **kwargs)
+ return int(texec.debug_str().split('\n')[-3].split()[1])
+
+
+def search_plan(sym, ntrial=6, type_dict=None, **kwargs):
+ """Quickly heurestic search over possible plans to find good memory plan.
+
+ Parameters
+ ----------
+ sym : symbolic
+ Symbolic configurations
+
+ ntrial: integer
+ Additional grid search steps
+ """
+ history = []
+ threshold = 0
+ min_threshold = None
+ min_cost = None
+ nbegin = 3
+
+ for k in range(nbegin):
+ info = {}
+ sym = make_mirror_plan(sym,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ save_size = info['save_size'] >> 20
+ local_size = info['max_size'] >> 20
+ guess = int(math.sqrt(save_size * local_size / 2))
+ if min_cost is None or min_cost > cost:
+ min_cost = cost
+ if min_threshold is None or local_size < min_threshold:
+ min_threshold = local_size
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold = guess
+
+ max_threshold = threshold * math.sqrt(2)
+ step = int((max_threshold - min_threshold) / ntrial)
+ threshold = min_threshold + step
+ if step > 0:
+ for k in range(ntrial):
+ sym = make_mirror_plan(sym,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold += step
+
+ history.sort(key=lambda x: x[0])
+ cost, threshold, sym = history[0]
+ print('Find best plan with threshold=%d, cost=%d MB' % (threshold, cost))
+ return sym
+
+
+def make_mirror_plan_to_layer(sym,
+ layer_name,
+ threshold,
+ plan_info=None,
+ **kwargs):
+ """
+ sym is the original symbal
+ layer_name is a name to which layer of the network should be set as mirror
+ threshhold is the approximate size of each mirror block
+ """
+ threshold = threshold << 20
+ sym = sym.__copy__()
+ internals = sym.get_internals()
+ _, out_shapes, _ = internals.infer_shape(**kwargs)
+ shape_dict = list(zip(internals.list_outputs(), out_shapes))
+ total_size = 0
+ param_size = 0
+ local_size = 0
+ save_size = 0
+ max_size = 0
+ last_stage = ''
+ stage_decision = ''
+ switch = True
+
+ for idx, item in enumerate(shape_dict):
+ sb = internals[idx]
+ name, shape = item
+ #print(name, switch)
+ if is_param(name):
+ param_size += prod(shape) * 4
+ continue
+ elif switch and not 'bn' in name:
+ total_size += prod(shape) * 4
+ local_size += prod(shape) * 4
+ sb._set_attr(force_mirroring='True')
+ print('set force_mirroring', name, total_size, local_size)
+ if layer_name != '' and layer_name in name:
+ switch = False
+
+ if sb.attr('mirror_stage') is not None:
+ stage = sb.attr('mirror_stage')
+ #print(name, stage)
+ if stage == 'True' or stage != last_stage:
+ if local_size > threshold:
+ save_size += prod(shape) * 4
+ max_size = max(max_size, local_size)
+ local_size = 0
+ stage_decision = 'False'
+ sb._set_attr(force_mirroring=stage_decision)
+ else:
+ stage_decision = 'True'
+ pass
+ last_stage = stage
+ elif stage == last_stage and stage_decision == 'False':
+ save_size += prod(shape) * 4
+ sb._set_attr(force_mirroring=stage_decision)
+
+ if plan_info is not None:
+ plan_info['max_size'] = max_size
+ plan_info['save_size'] = save_size
+ return sym
+
+
+def search_plan_to_layer(sym,
+ layer_name=None,
+ threshold=500,
+ ntrial=6,
+ type_dict=None,
+ **kwargs):
+ """Quickly heurestic search over possible plans to find good memory plan.
+
+ Parameters
+ ----------
+ sym : symbolic
+ Symbolic configurations
+
+ ntrial: integer
+ Additional grid search steps
+ """
+ history = []
+ min_threshold = None
+ min_cost = None
+ nbegin = 10
+
+ for k in range(nbegin):
+ info = {}
+ sym = make_mirror_plan_to_layer(sym,
+ layer_name=layer_name,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ save_size = info['save_size'] >> 20
+ local_size = info['max_size'] >> 20
+ guess = 300 * (k + 1)
+ if min_cost is None or min_cost > cost:
+ min_cost = cost
+ if min_threshold is None or local_size < min_threshold:
+ min_threshold = local_size
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold = guess
+
+ max_threshold = threshold * math.sqrt(2)
+ step = int((max_threshold - min_threshold) / ntrial)
+ print(min_threshold, max_threshold, step)
+ threshold = min_threshold + step
+ if step > 0:
+ for k in range(ntrial):
+ sym = make_mirror_plan_to_layer(sym,
+ layer_name=layer_name,
+ threshold=threshold,
+ plan_info=info,
+ **kwargs)
+ cost = get_cost(sym, type_dict, **kwargs)
+ print("Search threshold=%d MB, cost=%d MB" % (threshold, cost))
+ history.append((cost, threshold, sym))
+ threshold += step
+
+ history.sort(key=lambda x: x[0])
+ cost, threshold, sym = history[0]
+ print('Find best plan with threshold=%d, cost=%d MB' % (threshold, cost))
+ return sym
diff --git a/gender-age/symbol_utils.py b/recognition/subcenter_arcface/symbol/symbol_utils.py
similarity index 53%
rename from gender-age/symbol_utils.py
rename to recognition/subcenter_arcface/symbol/symbol_utils.py
index 0a48697..3eb6f1c 100644
--- a/gender-age/symbol_utils.py
+++ b/recognition/subcenter_arcface/symbol/symbol_utils.py
@@ -1,4 +1,8 @@
+import sys
+import os
import mxnet as mx
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
def Conv(**kwargs):
@@ -12,11 +16,14 @@ def Conv(**kwargs):
def Act(data, act_type, name):
#ignore param act_type, set it in this function
- body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
+ if act_type == 'prelu':
+ body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
+ else:
+ body = mx.sym.Activation(data=data, act_type=act_type, name=name)
return body
-bn_mom = 0.9
+bn_mom = config.bn_mom
def Linear(data,
@@ -42,8 +49,7 @@ def Linear(data,
return bn
-def get_fc1(last_conv, num_classes, fc_type):
- bn_mom = 0.9
+def get_fc1(last_conv, num_classes, fc_type, input_channel=512):
body = last_conv
if fc_type == 'Z':
body = mx.sym.BatchNorm(data=body,
@@ -68,13 +74,67 @@ def get_fc1(last_conv, num_classes, fc_type):
eps=2e-5,
momentum=bn_mom,
name='fc1')
+ elif fc_type == 'FC':
+ body = mx.sym.BatchNorm(data=body,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='bn1')
+ fc1 = mx.sym.FullyConnected(data=body,
+ num_hidden=num_classes,
+ name='pre_fc1')
+ fc1 = mx.sym.BatchNorm(data=fc1,
+ fix_gamma=True,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='fc1')
+ elif fc_type == 'SFC':
+ body = mx.sym.BatchNorm(data=body,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='bn1')
+ body = Conv(data=body,
+ num_filter=input_channel,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ no_bias=True,
+ name="convf",
+ num_group=input_channel)
+ body = mx.sym.BatchNorm(data=body,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='bnf')
+ body = Act(data=body, act_type=config.net_act, name='reluf')
+ body = Conv(data=body,
+ num_filter=input_channel,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ name="convf2")
+ body = mx.sym.BatchNorm(data=body,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='bnf2')
+ body = Act(data=body, act_type=config.net_act, name='reluf2')
+ fc1 = mx.sym.FullyConnected(data=body,
+ num_hidden=num_classes,
+ name='pre_fc1')
+ fc1 = mx.sym.BatchNorm(data=fc1,
+ fix_gamma=True,
+ eps=2e-5,
+ momentum=bn_mom,
+ name='fc1')
elif fc_type == 'GAP':
bn1 = mx.sym.BatchNorm(data=body,
fix_gamma=False,
eps=2e-5,
momentum=bn_mom,
name='bn1')
- relu1 = Act(data=bn1, act_type='relu', name='relu1')
+ relu1 = Act(data=bn1, act_type=config.net_act, name='relu1')
# Although kernel is not used here when global_pool=True, we should put one
pool1 = mx.sym.Pooling(data=relu1,
global_pool=True,
@@ -105,7 +165,7 @@ def get_fc1(last_conv, num_classes, fc_type):
eps=2e-5,
momentum=0.9,
name='convx_bn')
- body = Act(data=body, act_type='relu', name='convx_relu')
+ body = Act(data=body, act_type=config.net_act, name='convx_relu')
filters_in = num_classes
else:
body = last_conv
@@ -149,8 +209,8 @@ def get_fc1(last_conv, num_classes, fc_type):
name='fc1')
elif fc_type == "GDC": #mobilefacenet_v1
conv_6_dw = Linear(last_conv,
- num_filter=512,
- num_group=512,
+ num_filter=input_channel,
+ num_group=input_channel,
kernel=(7, 7),
pad=(0, 0),
stride=(1, 1),
@@ -209,46 +269,6 @@ def get_fc1(last_conv, num_classes, fc_type):
eps=2e-5,
momentum=bn_mom,
name='fc1')
- else:
- bn1 = mx.sym.BatchNorm(data=body,
- fix_gamma=False,
- eps=2e-5,
- momentum=bn_mom,
- name='bn1')
- relu1 = Act(data=bn1, act_type='relu', name='relu1')
- # Although kernel is not used here when global_pool=True, we should put one
- pool1 = mx.sym.Pooling(data=relu1,
- global_pool=True,
- kernel=(7, 7),
- pool_type='avg',
- name='pool1')
- flat = mx.sym.Flatten(data=pool1)
- if len(fc_type) > 1:
- if fc_type[1] == 'X':
- print('dropout mode')
- flat = mx.symbol.Dropout(data=flat, p=0.2)
- fc_type = fc_type[0]
- if fc_type == 'A':
- fc1 = flat
- else:
- #B-D
- #B
- fc1 = mx.sym.FullyConnected(data=flat,
- num_hidden=num_classes,
- name='pre_fc1')
- if fc_type == 'C':
- fc1 = mx.sym.BatchNorm(data=fc1,
- fix_gamma=True,
- eps=2e-5,
- momentum=bn_mom,
- name='fc1')
- elif fc_type == 'D':
- fc1 = mx.sym.BatchNorm(data=fc1,
- fix_gamma=True,
- eps=2e-5,
- momentum=bn_mom,
- name='fc1')
- fc1 = Act(data=fc1, act_type='relu', name='fc1_relu')
return fc1
@@ -293,7 +313,7 @@ def residual_unit_v3(data, num_filter, stride, dim_match, name, **kwargs):
eps=2e-5,
momentum=bn_mom,
name=name + '_bn2')
- act1 = Act(data=bn2, act_type='relu', name=name + '_relu1')
+ act1 = Act(data=bn2, act_type=config.net_act, name=name + '_relu1')
conv2 = Conv(data=act1,
num_filter=num_filter,
kernel=(3, 3),
@@ -328,9 +348,203 @@ def residual_unit_v3(data, num_filter, stride, dim_match, name, **kwargs):
return bn3 + shortcut
+def residual_unit_v1l(data, num_filter, stride, dim_match, name, bottle_neck):
+ """Return ResNet Unit symbol for building ResNet
+ Parameters
+ ----------
+ data : str
+ Input data
+ num_filter : int
+ Number of output channels
+ bnf : int
+ Bottle neck channels factor with regard to num_filter
+ stride : tuple
+ Stride used in convolution
+ dim_match : Boolean
+ True means channel number between input and output is the same, otherwise means differ
+ name : str
+ Base name of the operators
+ workspace : int
+ Workspace used in convolution operator
+ """
+ workspace = config.workspace
+ bn_mom = config.bn_mom
+ memonger = False
+ use_se = config.net_se
+ act_type = config.net_act
+ #print('in unit1')
+ if bottle_neck:
+ conv1 = Conv(data=data,
+ num_filter=int(num_filter * 0.25),
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv1')
+ bn1 = mx.sym.BatchNorm(data=conv1,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name=name + '_bn1')
+ act1 = Act(data=bn1, act_type=act_type, name=name + '_relu1')
+ conv2 = Conv(data=act1,
+ num_filter=int(num_filter * 0.25),
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv2')
+ bn2 = mx.sym.BatchNorm(data=conv2,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name=name + '_bn2')
+ act2 = Act(data=bn2, act_type=act_type, name=name + '_relu2')
+ conv3 = Conv(data=act2,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=stride,
+ pad=(0, 0),
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv3')
+ bn3 = mx.sym.BatchNorm(data=conv3,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name=name + '_bn3')
+
+ if use_se:
+ #se begin
+ body = mx.sym.Pooling(data=bn3,
+ global_pool=True,
+ kernel=(7, 7),
+ pool_type='avg',
+ name=name + '_se_pool1')
+ body = Conv(data=body,
+ num_filter=num_filter // 16,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv1",
+ workspace=workspace)
+ body = Act(data=body, act_type=act_type, name=name + '_se_relu1')
+ body = Conv(data=body,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv2",
+ workspace=workspace)
+ body = mx.symbol.Activation(data=body,
+ act_type='sigmoid',
+ name=name + "_se_sigmoid")
+ bn3 = mx.symbol.broadcast_mul(bn3, body)
+ #se end
+
+ if dim_match:
+ shortcut = data
+ else:
+ conv1sc = Conv(data=data,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=stride,
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv1sc')
+ shortcut = mx.sym.BatchNorm(data=conv1sc,
+ fix_gamma=False,
+ eps=2e-5,
+ momentum=bn_mom,
+ name=name + '_sc')
+ if memonger:
+ shortcut._set_attr(mirror_stage='True')
+ return Act(data=bn3 + shortcut,
+ act_type=act_type,
+ name=name + '_relu3')
+ else:
+ conv1 = Conv(data=data,
+ num_filter=num_filter,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv1')
+ bn1 = mx.sym.BatchNorm(data=conv1,
+ fix_gamma=False,
+ momentum=bn_mom,
+ eps=2e-5,
+ name=name + '_bn1')
+ act1 = Act(data=bn1, act_type=act_type, name=name + '_relu1')
+ conv2 = Conv(data=act1,
+ num_filter=num_filter,
+ kernel=(3, 3),
+ stride=stride,
+ pad=(1, 1),
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv2')
+ bn2 = mx.sym.BatchNorm(data=conv2,
+ fix_gamma=False,
+ momentum=bn_mom,
+ eps=2e-5,
+ name=name + '_bn2')
+ if use_se:
+ #se begin
+ body = mx.sym.Pooling(data=bn2,
+ global_pool=True,
+ kernel=(7, 7),
+ pool_type='avg',
+ name=name + '_se_pool1')
+ body = Conv(data=body,
+ num_filter=num_filter // 16,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv1",
+ workspace=workspace)
+ body = Act(data=body, act_type=act_type, name=name + '_se_relu1')
+ body = Conv(data=body,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv2",
+ workspace=workspace)
+ body = mx.symbol.Activation(data=body,
+ act_type='sigmoid',
+ name=name + "_se_sigmoid")
+ bn2 = mx.symbol.broadcast_mul(bn2, body)
+ #se end
+
+ if dim_match:
+ shortcut = data
+ else:
+ conv1sc = Conv(data=data,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=stride,
+ no_bias=True,
+ workspace=workspace,
+ name=name + '_conv1sc')
+ shortcut = mx.sym.BatchNorm(data=conv1sc,
+ fix_gamma=False,
+ momentum=bn_mom,
+ eps=2e-5,
+ name=name + '_sc')
+ if memonger:
+ shortcut._set_attr(mirror_stage='True')
+ return Act(data=bn2 + shortcut,
+ act_type=act_type,
+ name=name + '_relu3')
+
+
def get_head(data, version_input, num_filter):
- bn_mom = 0.9
- workspace = 256
+ bn_mom = config.bn_mom
+ workspace = config.workspace
kwargs = {'bn_mom': bn_mom, 'workspace': workspace}
data = data - 127.5
data = data * 0.0078125
@@ -349,7 +563,7 @@ def get_head(data, version_input, num_filter):
eps=2e-5,
momentum=bn_mom,
name='bn0')
- body = Act(data=body, act_type='relu', name='relu0')
+ body = Act(data=body, act_type=config.net_act, name='relu0')
body = mx.sym.Pooling(data=body,
kernel=(3, 3),
stride=(2, 2),
@@ -371,10 +585,11 @@ def get_head(data, version_input, num_filter):
eps=2e-5,
momentum=bn_mom,
name='bn0')
- body = Act(data=body, act_type='relu', name='relu0')
- body = residual_unit_v3(body,
- _num_filter, (2, 2),
- False,
- name='head',
- **kwargs)
+ body = Act(data=body, act_type=config.net_act, name='relu0')
+ #body = residual_unit_v3(body, _num_filter, (2, 2), False, name='head', **kwargs)
+ body = residual_unit_v1l(body,
+ _num_filter, (2, 2),
+ False,
+ name='head',
+ bottle_neck=False)
return body
diff --git a/recognition/subcenter_arcface/symbol/vargfacenet.py b/recognition/subcenter_arcface/symbol/vargfacenet.py
new file mode 100644
index 0000000..434e8be
--- /dev/null
+++ b/recognition/subcenter_arcface/symbol/vargfacenet.py
@@ -0,0 +1,578 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+'''
+Author: Horizon Robotics Inc.
+The company is committed to be the global leader of edge AI platform.
+The model implemented in this scripts runs ~200fps on the Sunrise 2.
+Sunrise 2 is the second generation of an embedded AI chip designed by Horizon Robotics,
+targeting to empower AIoT devices by AI.
+
+Implemented the following paper:
+Mengjia Yan, Mengao Zhao, Zining Xu, Qian Zhang, Guoli Wang, Zhizhong Su. "VarGFaceNet: An Efficient Variable Group Convolutional Neural Network for Lightweight Face Recognition" (https://arxiv.org/abs/1910.04985)
+
+'''
+
+import os
+import sys
+
+import mxnet as mx
+import symbol_utils
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from config import config
+
+
+def Act(data, act_type, name):
+ if act_type == 'prelu':
+ body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
+ else:
+ body = mx.symbol.Activation(data=data, act_type=act_type, name=name)
+ return body
+
+
+def get_setting_params(**kwargs):
+ # bn_params
+ bn_mom = kwargs.get('bn_mom', 0.9)
+ bn_eps = kwargs.get('bn_eps', 2e-5)
+ fix_gamma = kwargs.get('fix_gamma', False)
+ use_global_stats = kwargs.get('use_global_stats', False)
+ # net_setting param
+ workspace = kwargs.get('workspace', 512)
+ act_type = kwargs.get('act_type', 'prelu')
+ use_se = kwargs.get('use_se', True)
+ se_ratio = kwargs.get('se_ratio', 4)
+ group_base = kwargs.get('group_base', 8)
+
+ setting_params = {}
+ setting_params['bn_mom'] = bn_mom
+ setting_params['bn_eps'] = bn_eps
+ setting_params['fix_gamma'] = fix_gamma
+ setting_params['use_global_stats'] = use_global_stats
+ setting_params['workspace'] = workspace
+ setting_params['act_type'] = act_type
+ setting_params['use_se'] = use_se
+ setting_params['se_ratio'] = se_ratio
+ setting_params['group_base'] = group_base
+
+ return setting_params
+
+
+def se_block(data, num_filter, setting_params, name):
+ se_ratio = setting_params['se_ratio']
+ act_type = setting_params['act_type']
+
+ pool1 = mx.sym.Pooling(data=data,
+ global_pool=True,
+ pool_type='avg',
+ name=name + '_se_pool1')
+ conv1 = mx.sym.Convolution(data=pool1,
+ num_filter=num_filter // se_ratio,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv1")
+ act1 = Act(data=conv1, act_type=act_type, name=name + '_se_act1')
+
+ conv2 = mx.sym.Convolution(data=act1,
+ num_filter=num_filter,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ name=name + "_se_conv2")
+ act2 = mx.symbol.Activation(data=conv2,
+ act_type='sigmoid',
+ name=name + "_se_sigmoid")
+ out_data = mx.symbol.broadcast_mul(data, act2)
+ return out_data
+
+
+def separable_conv2d(data,
+ in_channels,
+ out_channels,
+ kernel,
+ pad,
+ setting_params,
+ stride=(1, 1),
+ factor=1,
+ bias=False,
+ bn_dw_out=True,
+ act_dw_out=True,
+ bn_pw_out=True,
+ act_pw_out=True,
+ dilate=1,
+ name=None):
+ bn_mom = setting_params['bn_mom']
+ bn_eps = setting_params['bn_eps']
+ fix_gamma = setting_params['fix_gamma']
+ use_global_stats = setting_params['use_global_stats']
+ workspace = setting_params['workspace']
+ group_base = setting_params['group_base']
+ act_type = setting_params['act_type']
+ assert in_channels % group_base == 0
+
+ # depthwise
+ dw_out = mx.sym.Convolution(data=data,
+ num_filter=int(in_channels * factor),
+ kernel=kernel,
+ pad=pad,
+ stride=stride,
+ no_bias=False if bias else True,
+ num_group=int(in_channels / group_base),
+ dilate=(dilate, dilate),
+ workspace=workspace,
+ name=name + '_conv2d_depthwise')
+ if bn_dw_out:
+ dw_out = mx.sym.BatchNorm(data=dw_out,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_conv2d_depthwise_bn')
+ if act_dw_out:
+ dw_out = Act(data=dw_out,
+ act_type=act_type,
+ name=name + '_conv2d_depthwise_act')
+ # pointwise
+ pw_out = mx.sym.Convolution(data=dw_out,
+ num_filter=out_channels,
+ kernel=(1, 1),
+ stride=(1, 1),
+ pad=(0, 0),
+ num_group=1,
+ no_bias=False if bias else True,
+ workspace=workspace,
+ name=name + '_conv2d_pointwise')
+ if bn_pw_out:
+ pw_out = mx.sym.BatchNorm(data=pw_out,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_conv2d_pointwise_bn')
+ if act_pw_out:
+ pw_out = Act(data=pw_out,
+ act_type=act_type,
+ name=name + '_conv2d_pointwise_act')
+ return pw_out
+
+
+def vargnet_block(data,
+ n_out_ch1,
+ n_out_ch2,
+ n_out_ch3,
+ setting_params,
+ factor=2,
+ dim_match=True,
+ multiplier=1,
+ kernel=(3, 3),
+ stride=(1, 1),
+ dilate=1,
+ with_dilate=False,
+ name=None):
+ use_se = setting_params['use_se']
+ act_type = setting_params['act_type']
+
+ out_channels_1 = int(n_out_ch1 * multiplier)
+ out_channels_2 = int(n_out_ch2 * multiplier)
+ out_channels_3 = int(n_out_ch3 * multiplier)
+
+ pad = (((kernel[0] - 1) * dilate + 1) // 2,
+ ((kernel[1] - 1) * dilate + 1) // 2)
+
+ if with_dilate:
+ stride = (1, 1)
+ if dim_match:
+ short_cut = data
+ else:
+ short_cut = separable_conv2d(data=data,
+ in_channels=out_channels_1,
+ out_channels=out_channels_3,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=stride,
+ factor=factor,
+ bias=False,
+ act_pw_out=False,
+ dilate=dilate,
+ name=name + '_shortcut')
+ sep1_data = separable_conv2d(data=data,
+ in_channels=out_channels_1,
+ out_channels=out_channels_2,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=stride,
+ factor=factor,
+ bias=False,
+ dilate=dilate,
+ name=name + '_sep1_data')
+ sep2_data = separable_conv2d(data=sep1_data,
+ in_channels=out_channels_2,
+ out_channels=out_channels_3,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=(1, 1),
+ factor=factor,
+ bias=False,
+ dilate=dilate,
+ act_pw_out=False,
+ name=name + '_sep2_data')
+
+ if use_se:
+ sep2_data = se_block(data=sep2_data,
+ num_filter=out_channels_3,
+ setting_params=setting_params,
+ name=name)
+
+ out_data = sep2_data + short_cut
+ out_data = Act(data=out_data,
+ act_type=act_type,
+ name=name + '_out_data_act')
+ return out_data
+
+
+def vargnet_branch_merge_block(data,
+ n_out_ch1,
+ n_out_ch2,
+ n_out_ch3,
+ setting_params,
+ factor=2,
+ dim_match=False,
+ multiplier=1,
+ kernel=(3, 3),
+ stride=(2, 2),
+ dilate=1,
+ with_dilate=False,
+ name=None):
+ act_type = setting_params['act_type']
+
+ out_channels_1 = int(n_out_ch1 * multiplier)
+ out_channels_2 = int(n_out_ch2 * multiplier)
+ out_channels_3 = int(n_out_ch3 * multiplier)
+
+ pad = (((kernel[0] - 1) * dilate + 1) // 2,
+ ((kernel[1] - 1) * dilate + 1) // 2)
+
+ if with_dilate:
+ stride = (1, 1)
+ if dim_match:
+ short_cut = data
+ else:
+ short_cut = separable_conv2d(data=data,
+ in_channels=out_channels_1,
+ out_channels=out_channels_3,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=stride,
+ factor=factor,
+ bias=False,
+ act_pw_out=False,
+ dilate=dilate,
+ name=name + '_shortcut')
+ sep1_data_brach1 = separable_conv2d(data=data,
+ in_channels=out_channels_1,
+ out_channels=out_channels_2,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=stride,
+ factor=factor,
+ bias=False,
+ dilate=dilate,
+ act_pw_out=False,
+ name=name + '_sep1_data_branch')
+ sep1_data_brach2 = separable_conv2d(data=data,
+ in_channels=out_channels_1,
+ out_channels=out_channels_2,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=stride,
+ factor=factor,
+ bias=False,
+ dilate=dilate,
+ act_pw_out=False,
+ name=name + '_sep2_data_branch')
+ sep1_data = sep1_data_brach1 + sep1_data_brach2
+ sep1_data = Act(data=sep1_data,
+ act_type=act_type,
+ name=name + '_sep1_data_act')
+ sep2_data = separable_conv2d(data=sep1_data,
+ in_channels=out_channels_2,
+ out_channels=out_channels_3,
+ kernel=kernel,
+ pad=pad,
+ setting_params=setting_params,
+ stride=(1, 1),
+ factor=factor,
+ bias=False,
+ dilate=dilate,
+ act_pw_out=False,
+ name=name + '_sep2_data')
+ out_data = sep2_data + short_cut
+ out_data = Act(data=out_data,
+ act_type=act_type,
+ name=name + '_out_data_act')
+ return out_data
+
+
+def add_vargnet_conv_block(data,
+ stage,
+ units,
+ in_channels,
+ out_channels,
+ setting_params,
+ kernel=(3, 3),
+ stride=(2, 2),
+ multiplier=1,
+ factor=2,
+ dilate=1,
+ with_dilate=False,
+ name=None):
+ assert stage >= 2, 'stage is {}, stage must be set >=2'.format(stage)
+ data = vargnet_branch_merge_block(data=data,
+ n_out_ch1=in_channels,
+ n_out_ch2=out_channels,
+ n_out_ch3=out_channels,
+ setting_params=setting_params,
+ factor=factor,
+ dim_match=False,
+ multiplier=multiplier,
+ kernel=kernel,
+ stride=stride,
+ dilate=dilate,
+ with_dilate=with_dilate,
+ name=name +
+ '_stage_{}_unit_1'.format(stage))
+ for i in range(units - 1):
+ data = vargnet_block(data=data,
+ n_out_ch1=out_channels,
+ n_out_ch2=out_channels,
+ n_out_ch3=out_channels,
+ setting_params=setting_params,
+ factor=factor,
+ dim_match=True,
+ multiplier=multiplier,
+ kernel=kernel,
+ stride=(1, 1),
+ dilate=dilate,
+ with_dilate=with_dilate,
+ name=name +
+ '_stage_{}_unit_{}'.format(stage, i + 2))
+ return data
+
+
+def add_head_block(data,
+ num_filter,
+ setting_params,
+ multiplier,
+ head_pooling=False,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ name=None):
+ bn_mom = setting_params['bn_mom']
+ bn_eps = setting_params['bn_eps']
+ fix_gamma = setting_params['fix_gamma']
+ use_global_stats = setting_params['use_global_stats']
+ workspace = setting_params['workspace']
+ act_type = setting_params['act_type']
+ channels = int(num_filter * multiplier)
+
+ conv1 = mx.sym.Convolution(data=data,
+ num_filter=channels,
+ kernel=kernel,
+ pad=pad,
+ stride=stride,
+ no_bias=True,
+ num_group=1,
+ workspace=workspace,
+ name=name + '_conv1')
+ bn1 = mx.sym.BatchNorm(data=conv1,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_conv1_bn')
+
+ act1 = Act(data=bn1, act_type=act_type, name=name + '_conv1_act')
+
+ if head_pooling:
+ head_data = mx.symbol.Pooling(data=act1,
+ kernel=(3, 3),
+ stride=(2, 2),
+ pad=(1, 1),
+ pool_type='max',
+ name=name + '_max_pooling')
+ else:
+ head_data = vargnet_block(data=act1,
+ n_out_ch1=num_filter,
+ n_out_ch2=num_filter,
+ n_out_ch3=num_filter,
+ setting_params=setting_params,
+ factor=1,
+ dim_match=False,
+ multiplier=multiplier,
+ kernel=kernel,
+ stride=(2, 2),
+ dilate=1,
+ with_dilate=False,
+ name=name + '_head_pooling')
+ return head_data
+
+
+def add_emb_block(data,
+ input_channels,
+ last_channels,
+ emb_size,
+ fc_type,
+ setting_params,
+ bias=False,
+ name=None):
+ bn_mom = setting_params['bn_mom']
+ bn_eps = setting_params['bn_eps']
+ fix_gamma = setting_params['fix_gamma']
+ use_global_stats = setting_params['use_global_stats']
+ workspace = setting_params['workspace']
+ act_type = setting_params['act_type']
+ group_base = setting_params['group_base']
+ # last channels
+ if input_channels != last_channels:
+ data = mx.sym.Convolution(data=data,
+ num_filter=last_channels,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ no_bias=False if bias else True,
+ workspace=workspace,
+ name=name + '_convx')
+ data = mx.sym.BatchNorm(data=data,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_convx_bn')
+ data = Act(data=data, act_type=act_type, name=name + '_convx_act')
+ # depthwise
+ convx_depthwise = mx.sym.Convolution(data=data,
+ num_filter=last_channels,
+ num_group=int(last_channels /
+ group_base),
+ kernel=(7, 7),
+ pad=(0, 0),
+ stride=(1, 1),
+ no_bias=False if bias else True,
+ workspace=workspace,
+ name=name + '_convx_depthwise')
+ convx_depthwise = mx.sym.BatchNorm(data=convx_depthwise,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_convx_depthwise_bn')
+ # pointwise
+ convx_pointwise = mx.sym.Convolution(data=convx_depthwise,
+ num_filter=last_channels // 2,
+ kernel=(1, 1),
+ pad=(0, 0),
+ stride=(1, 1),
+ no_bias=False if bias else True,
+ workspace=workspace,
+ name=name + '_convx_pointwise')
+ convx_pointwise = mx.sym.BatchNorm(data=convx_pointwise,
+ fix_gamma=fix_gamma,
+ eps=bn_eps,
+ momentum=bn_mom,
+ use_global_stats=use_global_stats,
+ name=name + '_convx_pointwise_bn')
+ convx_pointwise = Act(data=convx_pointwise,
+ act_type=act_type,
+ name=name + '_convx_pointwise_act')
+
+ fc1 = symbol_utils.get_fc1(convx_pointwise, emb_size, fc_type)
+ return fc1
+
+
+def get_symbol():
+ multiplier = config.net_multiplier
+ emb_size = config.emb_size
+ fc_type = config.net_output
+
+ kwargs = {
+ 'use_se': config.net_se,
+ 'act_type': config.net_act,
+ 'bn_mom': config.bn_mom,
+ 'workspace': config.workspace,
+ }
+
+ setting_params = get_setting_params(**kwargs)
+
+ factor = 2
+ head_pooling = False
+ num_stage = 3
+ stage_list = [2, 3, 4]
+ units = [3, 7, 4]
+ filter_list = [32, 64, 128, 256]
+ last_channels = 1024
+ dilate_list = [1, 1, 1]
+ with_dilate_list = [False, False, False]
+
+ data = mx.sym.Variable(name='data')
+ data = mx.sym.identity(data=data, name='id')
+ data = data - 127.5
+ data = data * 0.0078125
+
+ body = add_head_block(data=data,
+ num_filter=filter_list[0],
+ setting_params=setting_params,
+ multiplier=multiplier,
+ head_pooling=head_pooling,
+ kernel=(3, 3),
+ stride=(1, 1),
+ pad=(1, 1),
+ name="vargface_head")
+
+ for i in range(num_stage):
+ body = add_vargnet_conv_block(data=body,
+ stage=stage_list[i],
+ units=units[i],
+ in_channels=filter_list[i],
+ out_channels=filter_list[i + 1],
+ setting_params=setting_params,
+ kernel=(3, 3),
+ stride=(2, 2),
+ multiplier=multiplier,
+ factor=factor,
+ dilate=dilate_list[i],
+ with_dilate=with_dilate_list[i],
+ name="vargface")
+ emb_feat = add_emb_block(data=body,
+ input_channels=filter_list[3],
+ last_channels=last_channels,
+ emb_size=emb_size,
+ fc_type=fc_type,
+ setting_params=setting_params,
+ bias=False,
+ name='embed')
+ return emb_feat
+
+
+if __name__ == '__main__':
+ get_symbol()
diff --git a/recognition/SubCenter-ArcFace/train_parall.py b/recognition/subcenter_arcface/train_parall.py
similarity index 99%
rename from recognition/SubCenter-ArcFace/train_parall.py
rename to recognition/subcenter_arcface/train_parall.py
index d444f6f..72b1ff7 100644
--- a/recognition/SubCenter-ArcFace/train_parall.py
+++ b/recognition/subcenter_arcface/train_parall.py
@@ -16,8 +16,10 @@ from mxnet import ndarray as nd
import argparse
import mxnet.optimizer as optimizer
from config import config, default, generate_config
+sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import verification
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
+
import fresnet
import fmobilefacenet
import fmobilenet
diff --git a/recognition/tools/face2rec2.py b/recognition/tools/face2rec2.py
deleted file mode 100644
index a6af12c..0000000
--- a/recognition/tools/face2rec2.py
+++ /dev/null
@@ -1,320 +0,0 @@
-import os
-import sys
-import mxnet as mx
-import random
-import argparse
-import cv2
-import time
-import traceback
-from easydict import EasyDict as edict
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
-import face_align
-
-try:
- import multiprocessing
-except ImportError:
- multiprocessing = None
-
-
-def parse_lst_line(line):
- vec = line.strip().split("\t")
- assert len(vec) >= 3
- aligned = int(vec[0])
- image_path = vec[1]
- label = int(vec[2])
- bbox = None
- landmark = None
- #print(vec)
- if len(vec) > 3:
- bbox = np.zeros((4, ), dtype=np.int32)
- for i in xrange(3, 7):
- bbox[i - 3] = int(vec[i])
- landmark = None
- if len(vec) > 7:
- _l = []
- for i in xrange(7, 17):
- _l.append(float(vec[i]))
- landmark = np.array(_l).reshape((2, 5)).T
- #print(aligned)
- return image_path, label, bbox, landmark, aligned
-
-
-def read_list(path_in):
- with open(path_in) as fin:
- identities = []
- last = [-1, -1]
- _id = 1
- while True:
- line = fin.readline()
- if not line:
- break
- item = edict()
- item.flag = 0
- item.image_path, label, item.bbox, item.landmark, item.aligned = parse_lst_line(
- line)
- if not item.aligned and item.landmark is None:
- #print('ignore line', line)
- continue
- item.id = _id
- item.label = [label, item.aligned]
- yield item
- if label != last[0]:
- if last[1] >= 0:
- identities.append((last[1], _id))
- last[0] = label
- last[1] = _id
- _id += 1
- identities.append((last[1], _id))
- item = edict()
- item.flag = 2
- item.id = 0
- item.label = [float(_id), float(_id + len(identities))]
- yield item
- for identity in identities:
- item = edict()
- item.flag = 2
- item.id = _id
- _id += 1
- item.label = [float(identity[0]), float(identity[1])]
- yield item
-
-
-def image_encode(args, i, item, q_out):
- oitem = [item.id]
- #print('flag', item.flag)
- if item.flag == 0:
- fullpath = item.image_path
- header = mx.recordio.IRHeader(item.flag, item.label, item.id, 0)
- #print('write', item.flag, item.id, item.label)
- if item.aligned:
- with open(fullpath, 'rb') as fin:
- img = fin.read()
- s = mx.recordio.pack(header, img)
- q_out.put((i, s, oitem))
- else:
- img = cv2.imread(fullpath, args.color)
- assert item.landmark is not None
- img = face_align.norm_crop(img, item.landmark)
- s = mx.recordio.pack_img(header,
- img,
- quality=args.quality,
- img_fmt=args.encoding)
- q_out.put((i, s, oitem))
- else:
- header = mx.recordio.IRHeader(item.flag, item.label, item.id, 0)
- #print('write', item.flag, item.id, item.label)
- s = mx.recordio.pack(header, '')
- q_out.put((i, s, oitem))
-
-
-def read_worker(args, q_in, q_out):
- while True:
- deq = q_in.get()
- if deq is None:
- break
- i, item = deq
- image_encode(args, i, item, q_out)
-
-
-def write_worker(q_out, fname, working_dir):
- pre_time = time.time()
- count = 0
- fname = os.path.basename(fname)
- fname_rec = os.path.splitext(fname)[0] + '.rec'
- fname_idx = os.path.splitext(fname)[0] + '.idx'
- record = mx.recordio.MXIndexedRecordIO(
- os.path.join(working_dir, fname_idx),
- os.path.join(working_dir, fname_rec), 'w')
- buf = {}
- more = True
- while more:
- deq = q_out.get()
- if deq is not None:
- i, s, item = deq
- buf[i] = (s, item)
- else:
- more = False
- while count in buf:
- s, item = buf[count]
- del buf[count]
- if s is not None:
- #print('write idx', item[0])
- record.write_idx(item[0], s)
-
- if count % 1000 == 0:
- cur_time = time.time()
- print('time:', cur_time - pre_time, ' count:', count)
- pre_time = cur_time
- count += 1
-
-
-def parse_args():
- parser = argparse.ArgumentParser(
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- description='Create an image list or \
- make a record database by reading from an image list')
- parser.add_argument('prefix',
- help='prefix of input/output lst and rec files.')
- #parser.add_argument('root', help='path to folder containing images.')
-
- cgroup = parser.add_argument_group('Options for creating image lists')
- cgroup.add_argument(
- '--list',
- type=bool,
- default=False,
- help=
- 'If this is set im2rec will create image list(s) by traversing root folder\
- and output to .lst.\
- Otherwise im2rec will read .lst and create a database at .rec'
- )
- cgroup.add_argument('--exts',
- nargs='+',
- default=['.jpeg', '.jpg'],
- help='list of acceptable image extensions.')
- cgroup.add_argument('--chunks',
- type=int,
- default=1,
- help='number of chunks.')
- cgroup.add_argument('--train-ratio',
- type=float,
- default=1.0,
- help='Ratio of images to use for training.')
- cgroup.add_argument('--test-ratio',
- type=float,
- default=0,
- help='Ratio of images to use for testing.')
- cgroup.add_argument(
- '--recursive',
- type=bool,
- default=False,
- help=
- 'If true recursively walk through subdirs and assign an unique label\
- to images in each folder. Otherwise only include images in the root folder\
- and give them label 0.')
- cgroup.add_argument('--shuffle',
- type=bool,
- default=True,
- help='If this is set as True, \
- im2rec will randomize the image order in .lst')
-
- rgroup = parser.add_argument_group('Options for creating database')
- rgroup.add_argument(
- '--quality',
- type=int,
- default=95,
- help=
- 'JPEG quality for encoding, 1-100; or PNG compression for encoding, 1-9'
- )
- rgroup.add_argument(
- '--num-thread',
- type=int,
- default=1,
- help=
- 'number of thread to use for encoding. order of images will be different\
- from the input list if >1. the input list will be modified to match the\
- resulting order.')
- rgroup.add_argument('--color',
- type=int,
- default=1,
- choices=[-1, 0, 1],
- help='specify the color mode of the loaded image.\
- 1: Loads a color image. Any transparency of image will be neglected. It is the default flag.\
- 0: Loads image in grayscale mode.\
- -1:Loads image as such including alpha channel.')
- rgroup.add_argument('--encoding',
- type=str,
- default='.jpg',
- choices=['.jpg', '.png'],
- help='specify the encoding of the images.')
- rgroup.add_argument(
- '--pack-label',
- type=bool,
- default=False,
- help='Whether to also pack multi dimensional label in the record file')
- args = parser.parse_args()
- args.prefix = os.path.abspath(args.prefix)
- #args.root = os.path.abspath(args.root)
- return args
-
-
-if __name__ == '__main__':
- args = parse_args()
- if args.list:
- pass
- #make_list(args)
- else:
- if os.path.isdir(args.prefix):
- working_dir = args.prefix
- else:
- working_dir = os.path.dirname(args.prefix)
- image_size = (112, 112)
- print('image_size', image_size)
- args.image_h = image_size[0]
- args.image_w = image_size[1]
- files = [
- os.path.join(working_dir, fname)
- for fname in os.listdir(working_dir)
- if os.path.isfile(os.path.join(working_dir, fname))
- ]
- count = 0
- for fname in files:
- if fname.startswith(args.prefix) and fname.endswith('.lst'):
- print('Creating .rec file from', fname, 'in', working_dir)
- count += 1
- image_list = read_list(fname)
- # -- write_record -- #
- if args.num_thread > 1 and multiprocessing is not None:
- q_in = [
- multiprocessing.Queue(1024)
- for i in range(args.num_thread)
- ]
- q_out = multiprocessing.Queue(1024)
- read_process = [multiprocessing.Process(target=read_worker, args=(args, q_in[i], q_out)) \
- for i in range(args.num_thread)]
- for p in read_process:
- p.start()
- write_process = multiprocessing.Process(
- target=write_worker, args=(q_out, fname, working_dir))
- write_process.start()
-
- for i, item in enumerate(image_list):
- q_in[i % len(q_in)].put((i, item))
- for q in q_in:
- q.put(None)
- for p in read_process:
- p.join()
-
- q_out.put(None)
- write_process.join()
- else:
- print(
- 'multiprocessing not available, fall back to single threaded encoding'
- )
- try:
- import Queue as queue
- except ImportError:
- import queue
- q_out = queue.Queue()
- fname = os.path.basename(fname)
- fname_rec = os.path.splitext(fname)[0] + '.rec'
- fname_idx = os.path.splitext(fname)[0] + '.idx'
- record = mx.recordio.MXIndexedRecordIO(
- os.path.join(working_dir, fname_idx),
- os.path.join(working_dir, fname_rec), 'w')
- cnt = 0
- pre_time = time.time()
- for i, item in enumerate(image_list):
- image_encode(args, i, item, q_out)
- if q_out.empty():
- continue
- _, s, item = q_out.get()
- #header, _ = mx.recordio.unpack(s)
- #print('write header label', header.label)
- record.write_idx(item[0], s)
- if cnt % 1000 == 0:
- cur_time = time.time()
- print('time:', cur_time - pre_time, ' count:', cnt)
- pre_time = cur_time
- cnt += 1
- if not count:
- print('Did not find and list file with prefix %s' % args.prefix)
diff --git a/resources/11513D05.jpg b/resources/11513D05.jpg
deleted file mode 100644
index c38bb0d..0000000
Binary files a/resources/11513D05.jpg and /dev/null differ
diff --git a/resources/arcface.png b/resources/arcface.png
deleted file mode 100644
index fa43f9e..0000000
Binary files a/resources/arcface.png and /dev/null differ
diff --git a/resources/cov_test.jpg b/resources/cov_test.jpg
deleted file mode 100644
index 8d5bbe5..0000000
Binary files a/resources/cov_test.jpg and /dev/null differ
diff --git a/resources/facerecognitionfromvideo.PNG b/resources/facerecognitionfromvideo.PNG
deleted file mode 100644
index 96f7724..0000000
Binary files a/resources/facerecognitionfromvideo.PNG and /dev/null differ
diff --git a/resources/lfr19_wechat1.jpg b/resources/lfr19_wechat1.jpg
deleted file mode 100644
index 3ba0dad..0000000
Binary files a/resources/lfr19_wechat1.jpg and /dev/null differ
diff --git a/resources/mainsteps.png b/resources/mainsteps.png
deleted file mode 100644
index 1d3d1ff..0000000
Binary files a/resources/mainsteps.png and /dev/null differ
diff --git a/resources/memoryspeed.png b/resources/memoryspeed.png
deleted file mode 100644
index d4e1a44..0000000
Binary files a/resources/memoryspeed.png and /dev/null differ
diff --git a/resources/mfrlogo.jpg b/resources/mfrlogo.jpg
deleted file mode 100644
index 112896c..0000000
Binary files a/resources/mfrlogo.jpg and /dev/null differ
diff --git a/resources/retina_R50_ex1.jpg b/resources/retina_R50_ex1.jpg
deleted file mode 100644
index 603cda3..0000000
Binary files a/resources/retina_R50_ex1.jpg and /dev/null differ
diff --git a/resources/retina_R50_ex2.jpg b/resources/retina_R50_ex2.jpg
deleted file mode 100644
index ec2a606..0000000
Binary files a/resources/retina_R50_ex2.jpg and /dev/null differ
diff --git a/resources/subcenterarcfacediff.png b/resources/subcenterarcfacediff.png
deleted file mode 100644
index 541fa76..0000000
Binary files a/resources/subcenterarcfacediff.png and /dev/null differ
diff --git a/resources/subcenterarcfaceframework.png b/resources/subcenterarcfaceframework.png
deleted file mode 100644
index b7be824..0000000
Binary files a/resources/subcenterarcfaceframework.png and /dev/null differ
diff --git a/resources/widerfacevaltest.png b/resources/widerfacevaltest.png
deleted file mode 100644
index 65807de..0000000
Binary files a/resources/widerfacevaltest.png and /dev/null differ
diff --git a/sample-images/t2.jpg b/sample-images/t2.jpg
deleted file mode 100644
index dcca930..0000000
Binary files a/sample-images/t2.jpg and /dev/null differ