diff --git a/src/operator/lsoftmax-inl.h b/3rdparty/operator/lsoftmax-inl.h
similarity index 100%
rename from src/operator/lsoftmax-inl.h
rename to 3rdparty/operator/lsoftmax-inl.h
diff --git a/src/operator/lsoftmax.cc b/3rdparty/operator/lsoftmax.cc
similarity index 100%
rename from src/operator/lsoftmax.cc
rename to 3rdparty/operator/lsoftmax.cc
diff --git a/src/operator/lsoftmax.cu b/3rdparty/operator/lsoftmax.cu
similarity index 100%
rename from src/operator/lsoftmax.cu
rename to 3rdparty/operator/lsoftmax.cu
diff --git a/src/align/__init__.py b/src/align/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/align/align_celeb.py b/src/align/align_celeb.py
new file mode 100644
index 0000000..fa2e2c5
--- /dev/null
+++ b/src/align/align_celeb.py
@@ -0,0 +1,223 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import cv2
+import argparse
+import tensorflow as tf
+import numpy as np
+import base64
+#import facenet
+import detect_face
+from easydict import EasyDict as edict
+import random
+from time import sleep
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+import face_image
+import face_preprocess
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def IOU(Reframe,GTframe):
+  x1 = Reframe[0];
+  y1 = Reframe[1];
+  width1 = Reframe[2]-Reframe[0];
+  height1 = Reframe[3]-Reframe[1];
+
+  x2 = GTframe[0]
+  y2 = GTframe[1]
+  width2 = GTframe[2]-GTframe[0]
+  height2 = GTframe[3]-GTframe[1]
+
+  endx = max(x1+width1,x2+width2)
+  startx = min(x1,x2)
+  width = width1+width2-(endx-startx)
+
+  endy = max(y1+height1,y2+height2)
+  starty = min(y1,y2)
+  height = height1+height2-(endy-starty)
+
+  if width <=0 or height <= 0:
+    ratio = 0
+  else:
+    Area = width*height
+    Area1 = width1*height1
+    Area2 = width2*height2
+    ratio = Area*1./(Area1+Area2-Area)
+  return ratio
+
+
+def main(args):
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    datamap = {}
+    pp = 0
+    datasize = 0
+    verr = 0
+    for line in open(args.input_dir+"_clean_list.txt", 'r'):
+      pp+=1
+      if pp%10000==0:
+        print('loading list', pp)
+      line = line.strip()[2:]
+      if not line.startswith('m.'):
+        continue
+      vec = line.split('/')
+      assert len(vec)==2
+      #print(line)
+      person = vec[0]
+      img = vec[1]
+      try:
+        img_id = int(img.split('.')[0])
+      except ValueError:
+        #print('value error', line)
+        verr+=1
+        continue
+      if not person in datamap:
+        labelid = len(datamap)
+        datamap[person] = [labelid, {img_id : 1}]
+      else:
+        datamap[person][1][img_id] = 1
+      datasize+=1
+
+    print('dataset size', args.name, datasize)
+    print('dataset err', verr)
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        sess = tf.Session()
+        with sess.as_default():
+            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    
+    minsize = 100 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+
+    print(minsize)
+    print(threshold)
+    print(factor)
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    #random_key = np.random.randint(0, high=99999)
+    #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
+    
+    with open(output_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof_successfully_aligned = 0
+        nrof_changed = 0
+        nrof_iou3 = 0
+        nrof_force = 0
+        for line in open(args.input_dir, 'r'):
+            vec = line.strip().split()
+            person = vec[0]
+            img_id = int(vec[1])
+            v = datamap.get(person, None)
+            if v is None:
+              continue
+            if not img_id in v[1]:
+              continue
+            labelid = v[0]
+            img_str = base64.b64decode(vec[-1])
+            nparr = np.fromstring(img_str, np.uint8)
+            img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR)
+            img = img[...,::-1] #to rgb
+            if nrof_images_total%100==0:
+              print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned))
+            nrof_images_total += 1
+            target_dir = os.path.join(output_dir, person)
+            if not os.path.exists(target_dir):
+              os.makedirs(target_dir)
+            target_path = os.path.join(target_dir, "%d.jpg"%img_id)
+            _minsize = minsize
+            fimage = edict()
+            fimage.bbox = None
+            fimage.image_path = target_path
+            fimage.classname = str(labelid)
+            if fimage.bbox is not None:
+              _bb = fimage.bbox
+              _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] )
+
+            bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
+            bindex = -1
+            nrof_faces = bounding_boxes.shape[0]
+            if fimage.bbox is None and nrof_faces>0:
+              det = bounding_boxes[:,0:4]
+              img_size = np.asarray(img.shape)[0:2]
+              bindex = 0
+              if nrof_faces>1:
+                bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
+                img_center = img_size / 2
+                offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
+                offset_dist_squared = np.sum(np.power(offsets,2.0),0)
+                bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
+            if fimage.bbox is not None:
+              if nrof_faces>0:
+                assert(bounding_boxes.shape[0]==points.shape[1])
+                det = bounding_boxes[:,0:4]
+                img_size = np.asarray(img.shape)[0:2]
+                index2 = [0.0, 0]
+                for i in xrange(det.shape[0]):
+                  _det = det[i]
+                  iou = IOU(fimage.bbox, _det)
+                  if iou>index2[0]:
+                    index2[0] = iou
+                    index2[1] = i
+                if index2[0]>-0.3:
+                  bindex = index2[1]
+                  nrof_iou3+=1
+              if bindex<0:
+                bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet)
+                bindex = 0
+                nrof_force+=1
+                    
+            if bindex>=0:
+
+                det = bounding_boxes[:,0:4]
+                det = det[bindex,:]
+                points = points[:, bindex]
+                landmark = points.reshape((2,5)).T
+                #points need to be transpose, points = points.reshape( (5,2) ).transpose()
+                det = np.squeeze(det)
+                bb = det
+                points = list(points.flatten())
+                assert(len(points)==10)
+                warped = face_preprocess.preprocess(img, bbox=bb, landmark = landmark, image_size="112,96")
+                misc.imsave(target_path, warped)
+                nrof_successfully_aligned += 1
+                oline = '%d\t%s\t%d' % (1,fimage.image_path, int(fimage.classname))
+                #oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3])
+                #oline += '\t'.join([str(x) for x in points])
+                text_file.write("%s\n"%oline)
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+    print('Number of changed: %d' % nrof_changed)
+    print('Number of iou3: %d' % nrof_iou3)
+    print('Number of force: %d' % nrof_force)
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('--name', type=str, default='celeb', help='')
+    parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
+    #parser.add_argument('--image_size', type=int,
+    #    help='Image size (height, width) in pixels.', default=182)
+    #parser.add_argument('--margin', type=int,
+    #    help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
diff --git a/src/align/align_dataset.py b/src/align/align_dataset.py
new file mode 100644
index 0000000..e74224a
--- /dev/null
+++ b/src/align/align_dataset.py
@@ -0,0 +1,137 @@
+"""Performs face alignment and stores face thumbnails in the output directory."""
+
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import random
+import align_dlib  # @UnresolvedImport
+import facenet
+
+def main(args):
+    align = align_dlib.AlignDlib(os.path.expanduser(args.dlib_face_predictor))
+    landmarkIndices = align_dlib.AlignDlib.OUTER_EYES_AND_NOSE
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = facenet.get_dataset(args.input_dir)
+    random.shuffle(dataset)
+    # Scale the image such that the face fills the frame when cropped to crop_size
+    scale = float(args.face_size) / args.image_size
+    nrof_images_total = 0
+    nrof_prealigned_images = 0
+    nrof_successfully_aligned = 0
+    for cls in dataset:
+        output_class_dir = os.path.join(output_dir, cls.name)
+        if not os.path.exists(output_class_dir):
+            os.makedirs(output_class_dir)
+        random.shuffle(cls.image_paths)
+        for image_path in cls.image_paths:
+            nrof_images_total += 1
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            output_filename = os.path.join(output_class_dir, filename+'.png')
+            if not os.path.exists(output_filename):
+                try:
+                    img = misc.imread(image_path)
+                except (IOError, ValueError, IndexError) as e:
+                    errorMessage = '{}: {}'.format(image_path, e)
+                    print(errorMessage)
+                else:
+                    if img.ndim == 2:
+                        img = facenet.to_rgb(img)
+                    if args.use_center_crop:
+                        scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
+                        sz1 = scaled.shape[1]/2
+                        sz2 = args.image_size/2
+                        aligned = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
+                    else:
+                        aligned = align.align(args.image_size, img, landmarkIndices=landmarkIndices, 
+                                              skipMulti=False, scale=scale)
+                    if aligned is not None:
+                        print(image_path)
+                        nrof_successfully_aligned += 1
+                        misc.imsave(output_filename, aligned)
+                    elif args.prealigned_dir:
+                        # Face detection failed. Use center crop from pre-aligned dataset
+                        class_name = os.path.split(output_class_dir)[1]
+                        image_path_without_ext = os.path.join(os.path.expanduser(args.prealigned_dir), 
+                                                              class_name, filename)
+                        # Find the extension of the image
+                        exts = ('jpg', 'png')
+                        for ext in exts:
+                            temp_path = image_path_without_ext + '.' + ext
+                            image_path = ''
+                            if os.path.exists(temp_path):
+                                image_path = temp_path
+                                break
+                        try:
+                            img = misc.imread(image_path)
+                        except (IOError, ValueError, IndexError) as e:
+                            errorMessage = '{}: {}'.format(image_path, e)
+                            print(errorMessage)
+                        else:
+                            scaled = misc.imresize(img, args.prealigned_scale, interp='bilinear')
+                            sz1 = scaled.shape[1]/2
+                            sz2 = args.image_size/2
+                            cropped = scaled[(sz1-sz2):(sz1+sz2),(sz1-sz2):(sz1+sz2),:]
+                            print(image_path)
+                            nrof_prealigned_images += 1
+                            misc.imsave(output_filename, cropped)
+                    else:
+                        print('Unable to align "%s"' % image_path)
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+    print('Number of pre-aligned images: %d' % nrof_prealigned_images)
+            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--dlib_face_predictor', type=str,
+        help='File containing the dlib face predictor.', default='../data/shape_predictor_68_face_landmarks.dat')
+    parser.add_argument('--image_size', type=int,
+        help='Image size (height, width) in pixels.', default=110)
+    parser.add_argument('--face_size', type=int,
+        help='Size of the face thumbnail (height, width) in pixels.', default=96)
+    parser.add_argument('--use_center_crop', 
+        help='Use the center crop of the original image after scaling the image using prealigned_scale.', action='store_true')
+    parser.add_argument('--prealigned_dir', type=str,
+        help='Replace image with a pre-aligned version when face detection fails.', default='')
+    parser.add_argument('--prealigned_scale', type=float,
+        help='The amount of scaling to apply to prealigned images before taking the center crop.', default=0.87)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
diff --git a/src/align/align_dataset_mtcnn.py b/src/align/align_dataset_mtcnn.py
new file mode 100644
index 0000000..d2a3eea
--- /dev/null
+++ b/src/align/align_dataset_mtcnn.py
@@ -0,0 +1,143 @@
+"""Performs face alignment and stores face thumbnails in the output directory."""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import tensorflow as tf
+import numpy as np
+import facenet
+import align.detect_face
+import random
+from time import sleep
+
+def main(args):
+    sleep(random.random())
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = facenet.get_dataset(args.input_dir)
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        with sess.as_default():
+            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
+    
+    minsize = 20 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    random_key = np.random.randint(0, high=99999)
+    bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    
+    with open(bounding_boxes_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof_successfully_aligned = 0
+        if args.random_order:
+            random.shuffle(dataset)
+        for cls in dataset:
+            output_class_dir = os.path.join(output_dir, cls.name)
+            if not os.path.exists(output_class_dir):
+                os.makedirs(output_class_dir)
+                if args.random_order:
+                    random.shuffle(cls.image_paths)
+            for image_path in cls.image_paths:
+                nrof_images_total += 1
+                filename = os.path.splitext(os.path.split(image_path)[1])[0]
+                output_filename = os.path.join(output_class_dir, filename+'.png')
+                print(image_path)
+                if not os.path.exists(output_filename):
+                    try:
+                        img = misc.imread(image_path)
+                    except (IOError, ValueError, IndexError) as e:
+                        errorMessage = '{}: {}'.format(image_path, e)
+                        print(errorMessage)
+                    else:
+                        if img.ndim<2:
+                            print('Unable to align "%s"' % image_path)
+                            text_file.write('%s\n' % (output_filename))
+                            continue
+                        if img.ndim == 2:
+                            img = facenet.to_rgb(img)
+                        img = img[:,:,0:3]
+    
+                        bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
+                        nrof_faces = bounding_boxes.shape[0]
+                        if nrof_faces>0:
+                            det = bounding_boxes[:,0:4]
+                            img_size = np.asarray(img.shape)[0:2]
+                            if nrof_faces>1:
+                                bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
+                                img_center = img_size / 2
+                                offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
+                                offset_dist_squared = np.sum(np.power(offsets,2.0),0)
+                                index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
+                                det = det[index,:]
+                            det = np.squeeze(det)
+                            bb = np.zeros(4, dtype=np.int32)
+                            bb[0] = np.maximum(det[0]-args.margin/2, 0)
+                            bb[1] = np.maximum(det[1]-args.margin/2, 0)
+                            bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
+                            bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
+                            cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+                            scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
+                            nrof_successfully_aligned += 1
+                            misc.imsave(output_filename, scaled)
+                            text_file.write('%s %d %d %d %d\n' % (output_filename, bb[0], bb[1], bb[2], bb[3]))
+                        else:
+                            print('Unable to align "%s"' % image_path)
+                            text_file.write('%s\n' % (output_filename))
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--image_size', type=int,
+        help='Image size (height, width) in pixels.', default=182)
+    parser.add_argument('--margin', type=int,
+        help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    parser.add_argument('--random_order', 
+        help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true')
+    parser.add_argument('--gpu_memory_fraction', type=float,
+        help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
diff --git a/src/align/align_dlib.py b/src/align/align_dlib.py
new file mode 100644
index 0000000..e5e1337
--- /dev/null
+++ b/src/align/align_dlib.py
@@ -0,0 +1,204 @@
+# Copyright 2015-2016 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for dlib-based alignment."""
+
+# NOTE: This file has been copied from the openface project.
+#  https://github.com/cmusatyalab/openface/blob/master/openface/align_dlib.py
+
+import cv2
+import dlib
+import numpy as np
+
+TEMPLATE = np.float32([
+    (0.0792396913815, 0.339223741112), (0.0829219487236, 0.456955367943),
+    (0.0967927109165, 0.575648016728), (0.122141515615, 0.691921601066),
+    (0.168687863544, 0.800341263616), (0.239789390707, 0.895732504778),
+    (0.325662452515, 0.977068762493), (0.422318282013, 1.04329000149),
+    (0.531777802068, 1.06080371126), (0.641296298053, 1.03981924107),
+    (0.738105872266, 0.972268833998), (0.824444363295, 0.889624082279),
+    (0.894792677532, 0.792494155836), (0.939395486253, 0.681546643421),
+    (0.96111933829, 0.562238253072), (0.970579841181, 0.441758925744),
+    (0.971193274221, 0.322118743967), (0.163846223133, 0.249151738053),
+    (0.21780354657, 0.204255863861), (0.291299351124, 0.192367318323),
+    (0.367460241458, 0.203582210627), (0.4392945113, 0.233135599851),
+    (0.586445962425, 0.228141644834), (0.660152671635, 0.195923841854),
+    (0.737466449096, 0.182360984545), (0.813236546239, 0.192828009114),
+    (0.8707571886, 0.235293377042), (0.51534533827, 0.31863546193),
+    (0.516221448289, 0.396200446263), (0.517118861835, 0.473797687758),
+    (0.51816430343, 0.553157797772), (0.433701156035, 0.604054457668),
+    (0.475501237769, 0.62076344024), (0.520712933176, 0.634268222208),
+    (0.565874114041, 0.618796581487), (0.607054002672, 0.60157671656),
+    (0.252418718401, 0.331052263829), (0.298663015648, 0.302646354002),
+    (0.355749724218, 0.303020650651), (0.403718978315, 0.33867711083),
+    (0.352507175597, 0.349987615384), (0.296791759886, 0.350478978225),
+    (0.631326076346, 0.334136672344), (0.679073381078, 0.29645404267),
+    (0.73597236153, 0.294721285802), (0.782865376271, 0.321305281656),
+    (0.740312274764, 0.341849376713), (0.68499850091, 0.343734332172),
+    (0.353167761422, 0.746189164237), (0.414587777921, 0.719053835073),
+    (0.477677654595, 0.706835892494), (0.522732900812, 0.717092275768),
+    (0.569832064287, 0.705414478982), (0.635195811927, 0.71565572516),
+    (0.69951672331, 0.739419187253), (0.639447159575, 0.805236879972),
+    (0.576410514055, 0.835436670169), (0.525398405766, 0.841706377792),
+    (0.47641545769, 0.837505914975), (0.41379548902, 0.810045601727),
+    (0.380084785646, 0.749979603086), (0.477955996282, 0.74513234612),
+    (0.523389793327, 0.748924302636), (0.571057789237, 0.74332894691),
+    (0.672409137852, 0.744177032192), (0.572539621444, 0.776609286626),
+    (0.5240106503, 0.783370783245), (0.477561227414, 0.778476346951)])
+
+INV_TEMPLATE = np.float32([
+                            (-0.04099179660567834, -0.008425234314031194, 2.575498465013183),
+                            (0.04062510634554352, -0.009678089746831375, -1.2534351452524177),
+                            (0.0003666902601348179, 0.01810332406086298, -0.32206331976076663)])
+
+TPL_MIN, TPL_MAX = np.min(TEMPLATE, axis=0), np.max(TEMPLATE, axis=0)
+MINMAX_TEMPLATE = (TEMPLATE - TPL_MIN) / (TPL_MAX - TPL_MIN)
+
+
+class AlignDlib:
+    """
+    Use `dlib's landmark estimation <http://blog.dlib.net/2014/08/real-time-face-pose-estimation.html>`_ to align faces.
+
+    The alignment preprocess faces for input into a neural network.
+    Faces are resized to the same size (such as 96x96) and transformed
+    to make landmarks (such as the eyes and nose) appear at the same
+    location on every image.
+
+    Normalized landmarks:
+
+    .. image:: ../images/dlib-landmark-mean.png
+    """
+
+    #: Landmark indices corresponding to the inner eyes and bottom lip.
+    INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
+
+    #: Landmark indices corresponding to the outer eyes and nose.
+    OUTER_EYES_AND_NOSE = [36, 45, 33]
+
+    def __init__(self, facePredictor):
+        """
+        Instantiate an 'AlignDlib' object.
+
+        :param facePredictor: The path to dlib's
+        :type facePredictor: str
+        """
+        assert facePredictor is not None
+
+        #pylint: disable=no-member
+        self.detector = dlib.get_frontal_face_detector()
+        self.predictor = dlib.shape_predictor(facePredictor)
+
+    def getAllFaceBoundingBoxes(self, rgbImg):
+        """
+        Find all face bounding boxes in an image.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :return: All face bounding boxes in an image.
+        :rtype: dlib.rectangles
+        """
+        assert rgbImg is not None
+
+        try:
+            return self.detector(rgbImg, 1)
+        except Exception as e: #pylint: disable=broad-except
+            print("Warning: {}".format(e))
+            # In rare cases, exceptions are thrown.
+            return []
+
+    def getLargestFaceBoundingBox(self, rgbImg, skipMulti=False):
+        """
+        Find the largest face bounding box in an image.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param skipMulti: Skip image if more than one face detected.
+        :type skipMulti: bool
+        :return: The largest face bounding box in an image, or None.
+        :rtype: dlib.rectangle
+        """
+        assert rgbImg is not None
+
+        faces = self.getAllFaceBoundingBoxes(rgbImg)
+        if (not skipMulti and len(faces) > 0) or len(faces) == 1:
+            return max(faces, key=lambda rect: rect.width() * rect.height())
+        else:
+            return None
+
+    def findLandmarks(self, rgbImg, bb):
+        """
+        Find the landmarks of a face.
+
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param bb: Bounding box around the face to find landmarks for.
+        :type bb: dlib.rectangle
+        :return: Detected landmark locations.
+        :rtype: list of (x,y) tuples
+        """
+        assert rgbImg is not None
+        assert bb is not None
+
+        points = self.predictor(rgbImg, bb)
+        #return list(map(lambda p: (p.x, p.y), points.parts()))
+        return [(p.x, p.y) for p in points.parts()]
+
+    #pylint: disable=dangerous-default-value
+    def align(self, imgDim, rgbImg, bb=None,
+              landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
+              skipMulti=False, scale=1.0):
+        r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
+
+        Transform and align a face in an image.
+
+        :param imgDim: The edge length in pixels of the square the image is resized to.
+        :type imgDim: int
+        :param rgbImg: RGB image to process. Shape: (height, width, 3)
+        :type rgbImg: numpy.ndarray
+        :param bb: Bounding box around the face to align. \
+                   Defaults to the largest face.
+        :type bb: dlib.rectangle
+        :param landmarks: Detected landmark locations. \
+                          Landmarks found on `bb` if not provided.
+        :type landmarks: list of (x,y) tuples
+        :param landmarkIndices: The indices to transform to.
+        :type landmarkIndices: list of ints
+        :param skipMulti: Skip image if more than one face detected.
+        :type skipMulti: bool
+        :param scale: Scale image before cropping to the size given by imgDim.
+        :type scale: float
+        :return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
+        :rtype: numpy.ndarray
+        """
+        assert imgDim is not None
+        assert rgbImg is not None
+        assert landmarkIndices is not None
+
+        if bb is None:
+            bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
+            if bb is None:
+                return
+
+        if landmarks is None:
+            landmarks = self.findLandmarks(rgbImg, bb)
+
+        npLandmarks = np.float32(landmarks)
+        npLandmarkIndices = np.array(landmarkIndices)
+
+        #pylint: disable=maybe-no-member
+        H = cv2.getAffineTransform(npLandmarks[npLandmarkIndices],
+                                   imgDim * MINMAX_TEMPLATE[npLandmarkIndices]*scale + imgDim*(1-scale)/2)
+        thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
+        
+        return thumbnail
diff --git a/src/align/align_facescrub.py b/src/align/align_facescrub.py
new file mode 100644
index 0000000..cd9fd03
--- /dev/null
+++ b/src/align/align_facescrub.py
@@ -0,0 +1,271 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import json
+import argparse
+import tensorflow as tf
+import numpy as np
+#import facenet
+import detect_face
+import random
+from time import sleep
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+import face_image
+from skimage import transform as trans
+import cv2
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def IOU(Reframe,GTframe):
+  x1 = Reframe[0];
+  y1 = Reframe[1];
+  width1 = Reframe[2]-Reframe[0];
+  height1 = Reframe[3]-Reframe[1];
+
+  x2 = GTframe[0]
+  y2 = GTframe[1]
+  width2 = GTframe[2]-GTframe[0]
+  height2 = GTframe[3]-GTframe[1]
+
+  endx = max(x1+width1,x2+width2)
+  startx = min(x1,x2)
+  width = width1+width2-(endx-startx)
+
+  endy = max(y1+height1,y2+height2)
+  starty = min(y1,y2)
+  height = height1+height2-(endy-starty)
+
+  if width <=0 or height <= 0:
+    ratio = 0
+  else:
+    Area = width*height
+    Area1 = width1*height1
+    Area2 = width2*height2
+    ratio = Area*1./(Area1+Area2-Area)
+  return ratio
+
+
+def main(args):
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    image_dir = os.path.join(args.input_dir, 'facescrub')
+    dataset = face_image.get_dataset('facescrub', image_dir)
+    print('dataset size', len(dataset))
+    bbox = {}
+    for label_file in ['facescrub_actors.txt',  'facescrub_actresses.txt']:
+      label_file = os.path.join(args.input_dir, label_file)
+      pp = 0
+      for line in open(label_file, 'r'):
+        pp+=1
+        if pp==1:
+          continue
+        vec = line.split("\t")
+        key = (vec[0], int(vec[2]))
+        value = [int(x) for x in vec[4].split(',')]
+        bbox[key] = value
+    print('bbox size', len(bbox))
+
+    valid_key = {}
+    json_data = open(os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read()
+    json_data = json.loads(json_data)['path']
+    for _data in json_data:
+      key = _data.split('/')[-1]
+      pos = key.rfind('.')
+      if pos<0:
+        print(_data)
+      else:
+        key = key[0:pos]
+      keys = key.split('_')
+      #print(key)
+      if len(keys)!=2:
+        print('err', key, _data)
+        continue
+      #assert len(keys)==2
+      key = (keys[0], int(keys[1]))
+      valid_key[key] = 1
+      #print(key)
+    print('valid keys', len(valid_key))
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        sess = tf.Session()
+        with sess.as_default():
+            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    
+    minsize = 100 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+    image_size = [112,96]
+    src = np.array([
+      [30.2946, 51.6963],
+      [65.5318, 51.5014],
+      [48.0252, 71.7366],
+      [33.5493, 92.3655],
+      [62.7299, 92.2041] ], dtype=np.float32 )
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    #random_key = np.random.randint(0, high=99999)
+    #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
+    if not os.path.exists(args.output_dir):
+      os.makedirs(args.output_dir)
+
+    output_filename = os.path.join(args.output_dir, 'lst')
+    
+    
+    with open(output_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof = np.zeros( (5,), dtype=np.int32)
+        for fimage in dataset:
+            if nrof_images_total%100==0:
+              print("Processing %d, (%s)" % (nrof_images_total, nrof))
+            nrof_images_total += 1
+            #if nrof_images_total<950000:
+            #  continue
+            image_path = fimage.image_path
+            if not os.path.exists(image_path):
+              print('image not found (%s)'%image_path)
+              continue
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            #print(image_path)
+            try:
+                img = misc.imread(image_path)
+            except (IOError, ValueError, IndexError) as e:
+                errorMessage = '{}: {}'.format(image_path, e)
+                print(errorMessage)
+            else:
+                if img.ndim<2:
+                    print('Unable to align "%s", img dim error' % image_path)
+                    #text_file.write('%s\n' % (output_filename))
+                    continue
+                if img.ndim == 2:
+                    img = to_rgb(img)
+                img = img[:,:,0:3]
+                _paths = fimage.image_path.split('/')
+                a,b,c = _paths[-3], _paths[-2], _paths[-1]
+                target_dir = os.path.join(args.output_dir, a, b)
+                if not os.path.exists(target_dir):
+                  os.makedirs(target_dir)
+                target_file = os.path.join(target_dir, c)
+                warped = None
+                if fimage.landmark is not None:
+                  dst = fimage.landmark.astype(np.float32)
+
+                  tform = trans.SimilarityTransform()
+                  tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25)
+                  M = tform.params[0:2,:]
+                  warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0)
+                  _minsize = image_size[0]
+                  bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor)
+                  if bounding_boxes.shape[0]>0:
+                    bindex = 0
+                    det = bounding_boxes[bindex,0:4]
+                    #points need to be transpose, points = points.reshape( (5,2) ).transpose()
+                    dst = points[:, bindex].reshape( (2,5) ).T
+                    tform = trans.SimilarityTransform()
+                    tform.estimate(dst, src)
+                    M = tform.params[0:2,:]
+                    warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                    nrof[0]+=1
+                #assert fimage.bbox is not None
+                if warped is None and fimage.bbox is not None:
+                  _minsize = img.shape[0]//4
+                  bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
+                  if bounding_boxes.shape[0]>0:
+                    det = bounding_boxes[:,0:4]
+                    bindex = -1
+                    index2 = [0.0, 0]
+                    for i in xrange(det.shape[0]):
+                      _det = det[i]
+                      iou = IOU(fimage.bbox, _det)
+                      if iou>index2[0]:
+                        index2[0] = iou
+                        index2[1] = i
+                    if index2[0]>0.3:
+                      bindex = index2[1]
+                    if bindex>=0:
+                      dst = points[:, bindex].reshape( (2,5) ).T
+                      tform = trans.SimilarityTransform()
+                      tform.estimate(dst, src)
+                      M = tform.params[0:2,:]
+                      warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                      nrof[1]+=1
+                      #print('1',target_file,index2[0])
+                if warped is None and fimage.bbox is not None:
+                  bb = fimage.bbox
+                  #croped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+                  bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet)
+                  assert bounding_boxes.shape[0]==1
+                  _box = bounding_boxes[0]
+                  if _box[4]>=0.3:
+                    dst = points[:, 0].reshape( (2,5) ).T
+                    tform = trans.SimilarityTransform()
+                    tform.estimate(dst, src)
+                    M = tform.params[0:2,:]
+                    warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                    nrof[2]+=1
+                    #print('2',target_file)
+
+                if warped is None:
+                  roi = np.zeros( (4,), dtype=np.int32)
+                  roi[0] = int(img.shape[1]*0.06)
+                  roi[1] = int(img.shape[0]*0.06)
+                  roi[2] = img.shape[1]-roi[0]
+                  roi[3] = img.shape[0]-roi[1]
+                  if fimage.bbox is not None:
+                    bb = fimage.bbox
+                    h = bb[3]-bb[1]
+                    w = bb[2]-bb[0]
+                    x = bb[0]
+                    y = bb[1]
+                    #roi = np.copy(bb)
+                    _w = int( (float(h)/image_size[0])*image_size[1] )
+                    x += (w-_w)//2
+                    #x = min( max(0,x), img.shape[1] )
+                    x = max(0,x)
+                    xw = x+_w
+                    xw = min(xw, img.shape[1])
+                    roi = np.array( (x, y, xw, y+h), dtype=np.int32)
+                    nrof[3]+=1
+                  else:
+                    nrof[4]+=1
+                  #print('3',bb,roi,img.shape)
+                  #print('3',target_file)
+                  warped = img[roi[1]:roi[3],roi[0]:roi[2],:]
+                  #print(warped.shape)
+                  warped = cv2.resize(warped, (image_size[1], image_size[0]))
+                bgr = warped[...,::-1]
+                cv2.imwrite(target_file, bgr)
+                oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname))
+                text_file.write(oline)
+                            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
+    #parser.add_argument('--image_size', type=int,
+    #    help='Image size (height, width) in pixels.', default=182)
+    #parser.add_argument('--margin', type=int,
+    #    help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
+
diff --git a/src/align/align_insight.py b/src/align/align_insight.py
new file mode 100644
index 0000000..a0e5d3a
--- /dev/null
+++ b/src/align/align_insight.py
@@ -0,0 +1,247 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import tensorflow as tf
+import numpy as np
+#import facenet
+import detect_face
+import random
+from time import sleep
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+import face_image
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def IOU(Reframe,GTframe):
+  x1 = Reframe[0];
+  y1 = Reframe[1];
+  width1 = Reframe[2]-Reframe[0];
+  height1 = Reframe[3]-Reframe[1];
+
+  x2 = GTframe[0]
+  y2 = GTframe[1]
+  width2 = GTframe[2]-GTframe[0]
+  height2 = GTframe[3]-GTframe[1]
+
+  endx = max(x1+width1,x2+width2)
+  startx = min(x1,x2)
+  width = width1+width2-(endx-startx)
+
+  endy = max(y1+height1,y2+height2)
+  starty = min(y1,y2)
+  height = height1+height2-(endy-starty)
+
+  if width <=0 or height <= 0:
+    ratio = 0
+  else:
+    Area = width*height
+    Area1 = width1*height1
+    Area2 = width2*height2
+    ratio = Area*1./(Area1+Area2-Area)
+  return ratio
+
+
+def main(args):
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = face_image.get_dataset(args.name, args.input_dir)
+    print('dataset size', args.name, len(dataset))
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        sess = tf.Session()
+        with sess.as_default():
+            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    
+    minsize = 100 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+    if args.name=='lfw' or args.name=='webface' or args.name=='vgg':
+      minsize = 20
+      threshold = [0.6,0.7,0.9]
+      factor = 0.85
+
+    print(minsize)
+    print(threshold)
+    print(factor)
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    #random_key = np.random.randint(0, high=99999)
+    #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
+    
+    with open(output_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof_successfully_aligned = 0
+        nrof_changed = 0
+        nrof_iou3 = 0
+        nrof_force = 0
+        for fimage in dataset:
+            if nrof_images_total%100==0:
+              print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned))
+            nrof_images_total += 1
+            image_path = fimage.image_path
+            if not os.path.exists(image_path):
+              print('image not found (%s)'%image_path)
+              continue
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            #print(image_path)
+            try:
+                img = misc.imread(image_path)
+            except (IOError, ValueError, IndexError) as e:
+                errorMessage = '{}: {}'.format(image_path, e)
+                print(errorMessage)
+            else:
+                if img.ndim<2:
+                    print('Unable to align "%s", img dim error' % image_path)
+                    #text_file.write('%s\n' % (output_filename))
+                    continue
+                if img.ndim == 2:
+                    img = to_rgb(img)
+                img = img[:,:,0:3]
+                _minsize = minsize
+                if fimage.bbox is not None:
+                  _bb = fimage.bbox
+                  _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] )
+
+                bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
+                bindex = -1
+                nrof_faces = bounding_boxes.shape[0]
+                if fimage.bbox is None and nrof_faces>0:
+                  det = bounding_boxes[:,0:4]
+                  img_size = np.asarray(img.shape)[0:2]
+                  bindex = 0
+                  if nrof_faces>1:
+                    bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
+                    img_center = img_size / 2
+                    offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
+                    offset_dist_squared = np.sum(np.power(offsets,2.0),0)
+                    bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
+                if fimage.bbox is not None:
+                  if nrof_faces>0:
+                    assert(bounding_boxes.shape[0]==points.shape[1])
+                    det = bounding_boxes[:,0:4]
+                    img_size = np.asarray(img.shape)[0:2]
+                    index2 = [0.0, 0]
+                    for i in xrange(det.shape[0]):
+                      _det = det[i]
+                      iou = IOU(fimage.bbox, _det)
+                      if iou>index2[0]:
+                        index2[0] = iou
+                        index2[1] = i
+                    if index2[0]>-0.3:
+                      bindex = index2[1]
+                      nrof_iou3+=1
+                  if bindex<0:
+                    bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet)
+                    bindex = 0
+                    nrof_force+=1
+                  #if bindex<0:
+                  #  _img = img[fimage.bbox[1]:fimage.bbox[3], fimage.bbox[0]:fimage.bbox[2],:]
+                  #  woffset = fimage.bbox[0]
+                  #  hoffset = fimage.bbox[1]
+                  #  _minsize = min( [_img.shape[0]//3, _img.shape[1]//3] )
+                  #  bounding_boxes, points = detect_face.detect_face(_img, _minsize, pnet, rnet, onet, [0.6,0.7,0.01], factor)
+                  #  nrof_faces = bounding_boxes.shape[0]
+                  #  print(nrof_faces)
+                  #  if nrof_faces>0:
+                  #    #print(points.shape)
+                  #    #assert(nrof_faces>0)
+                  #    bounding_boxes[:,0]+=woffset
+                  #    bounding_boxes[:,2]+=woffset
+                  #    bounding_boxes[:,1]+=hoffset
+                  #    bounding_boxes[:,3]+=hoffset
+                  #    points[0:5,:] += woffset
+                  #    points[5:10,:] += hoffset
+                  #    bindex = 0
+                  #    score = bounding_boxes[bindex,4]
+                  #    print(score)
+                  #    if score<=0.0:
+                  #      bindex = -1
+                  #    else:
+                  #      nrof_force+=1
+                  #if bindex<0:
+                  #  _bb = fimage.bbox
+                  #  _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] )
+                  #  bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.1], factor)
+                  #  nrof_faces = bounding_boxes.shape[0]
+                  #  print(nrof_faces)
+                  #  if nrof_faces>0:
+                  #    bindex = 0
+                #if fimage.bbox is not None and bounding_boxes.shape[0]==0:
+                #  bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.3], factor)
+
+
+                #print(bounding_boxes.shape, points.shape)
+                #print(nrof_faces, points.shape)
+                        
+                if bindex>=0:
+
+                    det = bounding_boxes[:,0:4]
+                    det = det[bindex,:]
+                    points = points[:, bindex]
+                    #points need to be transpose, points = points.reshape( (5,2) ).transpose()
+                    det = np.squeeze(det)
+                    #bb = np.zeros(4, dtype=np.int32)
+                    #bb[0] = np.maximum(det[0]-args.margin/2, 0)
+                    #bb[1] = np.maximum(det[1]-args.margin/2, 0)
+                    #bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
+                    #bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
+                    bb = det
+                    #print(points.shape)
+                    points = list(points.flatten())
+                    assert(len(points)==10)
+                    #cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+                    #scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
+                    #misc.imsave(output_filename, scaled)
+                    nrof_successfully_aligned += 1
+                    oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3])
+                    oline += '\t'.join([str(x) for x in points])
+                    text_file.write("%s\n"%oline)
+                else:
+                    print('Unable to align "%s", no face detected' % image_path)
+                    if args.force>0:
+                      if fimage.bbox is None:
+                        oline = '%d\t%s\t%d\n' % (0,fimage.image_path, int(fimage.classname))
+                      else:
+                        bb = fimage.bbox
+                        oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\n' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3])
+                      text_file.write(oline)
+                      #text_file.write('%s\n' % (output_filename))
+                            
+    print('Total number of images: %d' % nrof_images_total)
+    print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
+    print('Number of changed: %d' % nrof_changed)
+    print('Number of iou3: %d' % nrof_iou3)
+    print('Number of force: %d' % nrof_force)
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('--name', type=str, help='dataset name, can be facescrub, megaface, webface, celeb.')
+    parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--force', type=int, help='force to output if no faces detected.', default=1)
+    #parser.add_argument('--margin', type=int,
+    #    help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
diff --git a/src/align/align_lfw.py b/src/align/align_lfw.py
new file mode 100644
index 0000000..dd8ce94
--- /dev/null
+++ b/src/align/align_lfw.py
@@ -0,0 +1,161 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import tensorflow as tf
+import numpy as np
+#import facenet
+import detect_face
+import random
+from time import sleep
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+import face_image
+import face_preprocess
+from skimage import transform as trans
+import cv2
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def IOU(Reframe,GTframe):
+  x1 = Reframe[0];
+  y1 = Reframe[1];
+  width1 = Reframe[2]-Reframe[0];
+  height1 = Reframe[3]-Reframe[1];
+
+  x2 = GTframe[0]
+  y2 = GTframe[1]
+  width2 = GTframe[2]-GTframe[0]
+  height2 = GTframe[3]-GTframe[1]
+
+  endx = max(x1+width1,x2+width2)
+  startx = min(x1,x2)
+  width = width1+width2-(endx-startx)
+
+  endy = max(y1+height1,y2+height2)
+  starty = min(y1,y2)
+  height = height1+height2-(endy-starty)
+
+  if width <=0 or height <= 0:
+    ratio = 0
+  else:
+    Area = width*height
+    Area1 = width1*height1
+    Area2 = width2*height2
+    ratio = Area*1./(Area1+Area2-Area)
+  return ratio
+
+
+def main(args):
+    #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = face_image.get_dataset('lfw', args.input_dir)
+    print('dataset size', 'lfw', len(dataset))
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        sess = tf.Session()
+        with sess.as_default():
+            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    
+    minsize = 20
+    threshold = [0.6,0.7,0.9]
+    factor = 0.85
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    #random_key = np.random.randint(0, high=99999)
+    #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
+
+    if not os.path.exists(args.output_dir):
+      os.makedirs(args.output_dir)
+
+    output_filename = os.path.join(args.output_dir, 'lst')
+    
+    
+    with open(output_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof = np.zeros( (5,), dtype=np.int32)
+        for fimage in dataset:
+            if nrof_images_total%100==0:
+              print("Processing %d, (%s)" % (nrof_images_total, nrof))
+            nrof_images_total += 1
+            #if nrof_images_total<950000:
+            #  continue
+            image_path = fimage.image_path
+            if not os.path.exists(image_path):
+              print('image not found (%s)'%image_path)
+              continue
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            #print(image_path)
+            try:
+                img = misc.imread(image_path)
+            except (IOError, ValueError, IndexError) as e:
+                errorMessage = '{}: {}'.format(image_path, e)
+                print(errorMessage)
+            else:
+                if img.ndim<2:
+                    print('Unable to align "%s", img dim error' % image_path)
+                    #text_file.write('%s\n' % (output_filename))
+                    continue
+                if img.ndim == 2:
+                    img = to_rgb(img)
+                img = img[:,:,0:3]
+                _paths = fimage.image_path.split('/')
+                a,b = _paths[-2], _paths[-1]
+                target_dir = os.path.join(args.output_dir, a)
+                if not os.path.exists(target_dir):
+                  os.makedirs(target_dir)
+                target_file = os.path.join(target_dir, b)
+                _minsize = minsize
+                _bbox = None
+                _landmark = None
+                bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
+                nrof_faces = bounding_boxes.shape[0]
+                if nrof_faces>0:
+                  det = bounding_boxes[:,0:4]
+                  img_size = np.asarray(img.shape)[0:2]
+                  bindex = 0
+                  if nrof_faces>1:
+                      bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
+                      img_center = img_size / 2
+                      offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
+                      offset_dist_squared = np.sum(np.power(offsets,2.0),0)
+                      bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
+                  _bbox = bounding_boxes[bindex, 0:4]
+                  _landmark = points[:, bindex].reshape( (2,5) ).T
+                  nrof[0]+=1
+                else:
+                  nrof[1]+=1
+                warped = face_preprocess.preprocess(img, bbox=_bbox, landmark = _landmark, image_size=args.image_size)
+                bgr = warped[...,::-1]
+                #print(bgr.shape)
+                cv2.imwrite(target_file, bgr)
+                oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname))
+                text_file.write(oline)
+                            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
+    parser.add_argument('--image-size', type=str, help='Image size (height, width) in pixels.', default='112,96')
+    #parser.add_argument('--margin', type=int,
+    #    help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
+
+
diff --git a/src/align/align_megaface.py b/src/align/align_megaface.py
new file mode 100644
index 0000000..e43a295
--- /dev/null
+++ b/src/align/align_megaface.py
@@ -0,0 +1,237 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from scipy import misc
+import sys
+import os
+import argparse
+import tensorflow as tf
+import numpy as np
+#import facenet
+import detect_face
+import random
+from time import sleep
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common'))
+import face_image
+from skimage import transform as trans
+import cv2
+
+def to_rgb(img):
+    w, h = img.shape
+    ret = np.empty((w, h, 3), dtype=np.uint8)
+    ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
+    return ret
+
+
+def IOU(Reframe,GTframe):
+  x1 = Reframe[0];
+  y1 = Reframe[1];
+  width1 = Reframe[2]-Reframe[0];
+  height1 = Reframe[3]-Reframe[1];
+
+  x2 = GTframe[0]
+  y2 = GTframe[1]
+  width2 = GTframe[2]-GTframe[0]
+  height2 = GTframe[3]-GTframe[1]
+
+  endx = max(x1+width1,x2+width2)
+  startx = min(x1,x2)
+  width = width1+width2-(endx-startx)
+
+  endy = max(y1+height1,y2+height2)
+  starty = min(y1,y2)
+  height = height1+height2-(endy-starty)
+
+  if width <=0 or height <= 0:
+    ratio = 0
+  else:
+    Area = width*height
+    Area1 = width1*height1
+    Area2 = width2*height2
+    ratio = Area*1./(Area1+Area2-Area)
+  return ratio
+
+
+def main(args):
+    output_dir = os.path.expanduser(args.output_dir)
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    # Store some git revision info in a text file in the log directory
+    src_path,_ = os.path.split(os.path.realpath(__file__))
+    #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
+    dataset = face_image.get_dataset(args.name, args.input_dir)
+    print('dataset size', args.name, len(dataset))
+    
+    print('Creating networks and loading parameters')
+    
+    with tf.Graph().as_default():
+        #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
+        #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
+        sess = tf.Session()
+        with sess.as_default():
+            pnet, rnet, onet = detect_face.create_mtcnn(sess, None)
+    
+    minsize = 100 # minimum size of face
+    threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
+    factor = 0.709 # scale factor
+    image_size = [112,96]
+    src = np.array([
+      [30.2946, 51.6963],
+      [65.5318, 51.5014],
+      [48.0252, 71.7366],
+      [33.5493, 92.3655],
+      [62.7299, 92.2041] ], dtype=np.float32 )
+
+    # Add a random key to the filename to allow alignment using multiple processes
+    #random_key = np.random.randint(0, high=99999)
+    #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
+    #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name)
+    if not os.path.exists(args.output_dir):
+      os.makedirs(args.output_dir)
+
+    output_filename = os.path.join(args.output_dir, 'lst')
+    
+    
+    with open(output_filename, "w") as text_file:
+        nrof_images_total = 0
+        nrof = np.zeros( (5,), dtype=np.int32)
+        for fimage in dataset:
+            if nrof_images_total%100==0:
+              print("Processing %d, (%s)" % (nrof_images_total, nrof))
+            nrof_images_total += 1
+            #if nrof_images_total<950000:
+            #  continue
+            image_path = fimage.image_path
+            if not os.path.exists(image_path):
+              print('image not found (%s)'%image_path)
+              continue
+            filename = os.path.splitext(os.path.split(image_path)[1])[0]
+            #print(image_path)
+            try:
+                img = misc.imread(image_path)
+            except (IOError, ValueError, IndexError) as e:
+                errorMessage = '{}: {}'.format(image_path, e)
+                print(errorMessage)
+            else:
+                if img.ndim<2:
+                    print('Unable to align "%s", img dim error' % image_path)
+                    #text_file.write('%s\n' % (output_filename))
+                    continue
+                if img.ndim == 2:
+                    img = to_rgb(img)
+                img = img[:,:,0:3]
+                _paths = fimage.image_path.split('/')
+                a,b,c = _paths[-3], _paths[-2], _paths[-1]
+                target_dir = os.path.join(args.output_dir, a, b)
+                if not os.path.exists(target_dir):
+                  os.makedirs(target_dir)
+                target_file = os.path.join(target_dir, c)
+                warped = None
+                if fimage.landmark is not None:
+                  dst = fimage.landmark.astype(np.float32)
+
+                  tform = trans.SimilarityTransform()
+                  tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25)
+                  M = tform.params[0:2,:]
+                  warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0)
+                  _minsize = image_size[0]
+                  bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor)
+                  if bounding_boxes.shape[0]>0:
+                    bindex = 0
+                    det = bounding_boxes[bindex,0:4]
+                    #points need to be transpose, points = points.reshape( (5,2) ).transpose()
+                    dst = points[:, bindex].reshape( (2,5) ).T
+                    tform = trans.SimilarityTransform()
+                    tform.estimate(dst, src)
+                    M = tform.params[0:2,:]
+                    warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                    nrof[0]+=1
+                #assert fimage.bbox is not None
+                if warped is None and fimage.bbox is not None:
+                  _minsize = img.shape[0]//4
+                  bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor)
+                  if bounding_boxes.shape[0]>0:
+                    det = bounding_boxes[:,0:4]
+                    bindex = -1
+                    index2 = [0.0, 0]
+                    for i in xrange(det.shape[0]):
+                      _det = det[i]
+                      iou = IOU(fimage.bbox, _det)
+                      if iou>index2[0]:
+                        index2[0] = iou
+                        index2[1] = i
+                    if index2[0]>0.3:
+                      bindex = index2[1]
+                    if bindex>=0:
+                      dst = points[:, bindex].reshape( (2,5) ).T
+                      tform = trans.SimilarityTransform()
+                      tform.estimate(dst, src)
+                      M = tform.params[0:2,:]
+                      warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                      nrof[1]+=1
+                      #print('1',target_file,index2[0])
+                if warped is None and fimage.bbox is not None:
+                  bb = fimage.bbox
+                  #croped = img[bb[1]:bb[3],bb[0]:bb[2],:]
+                  bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet)
+                  assert bounding_boxes.shape[0]==1
+                  _box = bounding_boxes[0]
+                  if _box[4]>=0.3:
+                    dst = points[:, 0].reshape( (2,5) ).T
+                    tform = trans.SimilarityTransform()
+                    tform.estimate(dst, src)
+                    M = tform.params[0:2,:]
+                    warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
+                    nrof[2]+=1
+                    #print('2',target_file)
+
+                if warped is None:
+                  roi = np.zeros( (4,), dtype=np.int32)
+                  roi[0] = int(img.shape[1]*0.06)
+                  roi[1] = int(img.shape[0]*0.06)
+                  roi[2] = img.shape[1]-roi[0]
+                  roi[3] = img.shape[0]-roi[1]
+                  if fimage.bbox is not None:
+                    bb = fimage.bbox
+                    h = bb[3]-bb[1]
+                    w = bb[2]-bb[0]
+                    x = bb[0]
+                    y = bb[1]
+                    #roi = np.copy(bb)
+                    _w = int( (float(h)/image_size[0])*image_size[1] )
+                    x += (w-_w)//2
+                    #x = min( max(0,x), img.shape[1] )
+                    x = max(0,x)
+                    xw = x+_w
+                    xw = min(xw, img.shape[1])
+                    roi = np.array( (x, y, xw, y+h), dtype=np.int32)
+                    nrof[3]+=1
+                  else:
+                    nrof[4]+=1
+                  #print('3',bb,roi,img.shape)
+                  #print('3',target_file)
+                  warped = img[roi[1]:roi[3],roi[0]:roi[2],:]
+                  #print(warped.shape)
+                  warped = cv2.resize(warped, (image_size[1], image_size[0]))
+                bgr = warped[...,::-1]
+                cv2.imwrite(target_file, bgr)
+                oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname))
+                text_file.write(oline)
+                            
+
+def parse_arguments(argv):
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument('--input-dir', type=str, help='Directory with unaligned images.')
+    parser.add_argument('--name', type=str, help='dataset name, can be facescrub, megaface, webface, celeb.')
+    parser.add_argument('--output-dir', type=str, help='Directory with aligned face thumbnails.')
+    #parser.add_argument('--image_size', type=int,
+    #    help='Image size (height, width) in pixels.', default=182)
+    #parser.add_argument('--margin', type=int,
+    #    help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
+    return parser.parse_args(argv)
+
+if __name__ == '__main__':
+    main(parse_arguments(sys.argv[1:]))
+
diff --git a/src/align/det1.npy b/src/align/det1.npy
new file mode 100644
index 0000000..7c05a2c
Binary files /dev/null and b/src/align/det1.npy differ
diff --git a/src/align/det2.npy b/src/align/det2.npy
new file mode 100644
index 0000000..85d5bf0
Binary files /dev/null and b/src/align/det2.npy differ
diff --git a/src/align/det3.npy b/src/align/det3.npy
new file mode 100644
index 0000000..90d5ba9
Binary files /dev/null and b/src/align/det3.npy differ
diff --git a/src/align/detect_face.py b/src/align/detect_face.py
new file mode 100644
index 0000000..47af5f5
--- /dev/null
+++ b/src/align/detect_face.py
@@ -0,0 +1,848 @@
+""" Tensorflow implementation of the face detection / alignment algorithm found at
+https://github.com/kpzhang93/MTCNN_face_detection_alignment
+"""
+# MIT License
+# 
+# Copyright (c) 2016 David Sandberg
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six import string_types, iteritems
+
+import numpy as np
+import tensorflow as tf
+#from math import floor
+import cv2
+import os
+
+def layer(op):
+    '''Decorator for composable network layers.'''
+
+    def layer_decorated(self, *args, **kwargs):
+        # Automatically set a name if not provided.
+        name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
+        # Figure out the layer inputs.
+        if len(self.terminals) == 0:
+            raise RuntimeError('No input variables found for layer %s.' % name)
+        elif len(self.terminals) == 1:
+            layer_input = self.terminals[0]
+        else:
+            layer_input = list(self.terminals)
+        # Perform the operation and get the output.
+        layer_output = op(self, layer_input, *args, **kwargs)
+        # Add to layer LUT.
+        self.layers[name] = layer_output
+        # This output is now the input for the next layer.
+        self.feed(layer_output)
+        # Return self for chained calls.
+        return self
+
+    return layer_decorated
+
+class Network(object):
+
+    def __init__(self, inputs, trainable=True):
+        # The input nodes for this network
+        self.inputs = inputs
+        # The current list of terminal nodes
+        self.terminals = []
+        # Mapping from layer names to layers
+        self.layers = dict(inputs)
+        # If true, the resulting variables are set as trainable
+        self.trainable = trainable
+
+        self.setup()
+
+    def setup(self):
+        '''Construct the network. '''
+        raise NotImplementedError('Must be implemented by the subclass.')
+
+    def load(self, data_path, session, ignore_missing=False):
+        '''Load network weights.
+        data_path: The path to the numpy-serialized network weights
+        session: The current TensorFlow session
+        ignore_missing: If true, serialized weights for missing layers are ignored.
+        '''
+        data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
+
+        for op_name in data_dict:
+            with tf.variable_scope(op_name, reuse=True):
+                for param_name, data in iteritems(data_dict[op_name]):
+                    try:
+                        var = tf.get_variable(param_name)
+                        session.run(var.assign(data))
+                    except ValueError:
+                        if not ignore_missing:
+                            raise
+
+    def feed(self, *args):
+        '''Set the input(s) for the next operation by replacing the terminal nodes.
+        The arguments can be either layer names or the actual layers.
+        '''
+        assert len(args) != 0
+        self.terminals = []
+        for fed_layer in args:
+            if isinstance(fed_layer, string_types):
+                try:
+                    fed_layer = self.layers[fed_layer]
+                except KeyError:
+                    raise KeyError('Unknown layer name fed: %s' % fed_layer)
+            self.terminals.append(fed_layer)
+        return self
+
+    def get_output(self):
+        '''Returns the current network output.'''
+        return self.terminals[-1]
+
+    def get_unique_name(self, prefix):
+        '''Returns an index-suffixed unique name for the given prefix.
+        This is used for auto-generating layer names based on the type-prefix.
+        '''
+        ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
+        return '%s_%d' % (prefix, ident)
+
+    def make_var(self, name, shape):
+        '''Creates a new TensorFlow variable.'''
+        return tf.get_variable(name, shape, trainable=self.trainable)
+
+    def validate_padding(self, padding):
+        '''Verifies that the padding is one of the supported ones.'''
+        assert padding in ('SAME', 'VALID')
+
+    @layer
+    def conv(self,
+             inp,
+             k_h,
+             k_w,
+             c_o,
+             s_h,
+             s_w,
+             name,
+             relu=True,
+             padding='SAME',
+             group=1,
+             biased=True):
+        # Verify that the padding is acceptable
+        self.validate_padding(padding)
+        # Get the number of channels in the input
+        c_i = int(inp.get_shape()[-1])
+        # Verify that the grouping parameter is valid
+        assert c_i % group == 0
+        assert c_o % group == 0
+        # Convolution for a given input and kernel
+        convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
+        with tf.variable_scope(name) as scope:
+            kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
+            # This is the common-case. Convolve the input without any further complications.
+            output = convolve(inp, kernel)
+            # Add the biases
+            if biased:
+                biases = self.make_var('biases', [c_o])
+                output = tf.nn.bias_add(output, biases)
+            if relu:
+                # ReLU non-linearity
+                output = tf.nn.relu(output, name=scope.name)
+            return output
+
+    @layer
+    def prelu(self, inp, name):
+        with tf.variable_scope(name):
+            i = int(inp.get_shape()[-1])
+            alpha = self.make_var('alpha', shape=(i,))
+            output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
+        return output
+
+    @layer
+    def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
+        self.validate_padding(padding)
+        return tf.nn.max_pool(inp,
+                              ksize=[1, k_h, k_w, 1],
+                              strides=[1, s_h, s_w, 1],
+                              padding=padding,
+                              name=name)
+
+    @layer
+    def fc(self, inp, num_out, name, relu=True):
+        with tf.variable_scope(name):
+            input_shape = inp.get_shape()
+            if input_shape.ndims == 4:
+                # The input is spatial. Vectorize it first.
+                dim = 1
+                for d in input_shape[1:].as_list():
+                    dim *= int(d)
+                feed_in = tf.reshape(inp, [-1, dim])
+            else:
+                feed_in, dim = (inp, input_shape[-1].value)
+            weights = self.make_var('weights', shape=[dim, num_out])
+            biases = self.make_var('biases', [num_out])
+            op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
+            fc = op(feed_in, weights, biases, name=name)
+            return fc
+
+
+    """
+    Multi dimensional softmax,
+    refer to https://github.com/tensorflow/tensorflow/issues/210
+    compute softmax along the dimension of target
+    the native softmax only supports batch_size x dimension
+    """
+    @layer
+    def softmax(self, target, axis, name=None):
+        max_axis = tf.reduce_max(target, axis, keep_dims=True)
+        target_exp = tf.exp(target-max_axis)
+        normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
+        softmax = tf.div(target_exp, normalize, name)
+        return softmax
+    
+class PNet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='PReLU1')
+             .max_pool(2, 2, 2, 2, name='pool1')
+             .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='PReLU2')
+             .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='PReLU3')
+             .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
+             .softmax(3,name='prob1'))
+
+        (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
+             .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
+        
+class RNet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='prelu1')
+             .max_pool(3, 3, 2, 2, name='pool1')
+             .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='prelu2')
+             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+             .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='prelu3')
+             .fc(128, relu=False, name='conv4')
+             .prelu(name='prelu4')
+             .fc(2, relu=False, name='conv5-1')
+             .softmax(1,name='prob1'))
+
+        (self.feed('prelu4') #pylint: disable=no-value-for-parameter
+             .fc(4, relu=False, name='conv5-2'))
+
+class ONet(Network):
+    def setup(self):
+        (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
+             .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
+             .prelu(name='prelu1')
+             .max_pool(3, 3, 2, 2, name='pool1')
+             .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
+             .prelu(name='prelu2')
+             .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
+             .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
+             .prelu(name='prelu3')
+             .max_pool(2, 2, 2, 2, name='pool3')
+             .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
+             .prelu(name='prelu4')
+             .fc(256, relu=False, name='conv5')
+             .prelu(name='prelu5')
+             .fc(2, relu=False, name='conv6-1')
+             .softmax(1, name='prob1'))
+
+        (self.feed('prelu5') #pylint: disable=no-value-for-parameter
+             .fc(4, relu=False, name='conv6-2'))
+
+        (self.feed('prelu5') #pylint: disable=no-value-for-parameter
+             .fc(10, relu=False, name='conv6-3'))
+
+def create_mtcnn(sess, model_path):
+    if not model_path:
+        model_path,_ = os.path.split(os.path.realpath(__file__))
+
+    with tf.variable_scope('pnet'):
+        data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
+        pnet = PNet({'data':data})
+        pnet.load(os.path.join(model_path, 'det1.npy'), sess)
+    with tf.variable_scope('rnet'):
+        data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
+        rnet = RNet({'data':data})
+        rnet.load(os.path.join(model_path, 'det2.npy'), sess)
+    with tf.variable_scope('onet'):
+        data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
+        onet = ONet({'data':data})
+        onet.load(os.path.join(model_path, 'det3.npy'), sess)
+        
+    pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
+    rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
+    onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
+    return pnet_fun, rnet_fun, onet_fun
+
+def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
+    # im: input image
+    # minsize: minimum of faces' size
+    # pnet, rnet, onet: caffemodel
+    # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
+    # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
+    factor_count=0
+    total_boxes=np.empty((0,9))
+    points=[]
+    h=img.shape[0]
+    w=img.shape[1]
+    minl=np.amin([h, w])
+    m=12.0/minsize
+    minl=minl*m
+    # creat scale pyramid
+    scales=[]
+    while minl>=12:
+        scales += [m*np.power(factor, factor_count)]
+        minl = minl*factor
+        factor_count += 1
+
+    # first stage
+    for j in range(len(scales)):
+        scale=scales[j]
+        hs=int(np.ceil(h*scale))
+        ws=int(np.ceil(w*scale))
+        im_data = imresample(img, (hs, ws))
+        im_data = (im_data-127.5)*0.0078125
+        img_x = np.expand_dims(im_data, 0)
+        img_y = np.transpose(img_x, (0,2,1,3))
+        out = pnet(img_y)
+        out0 = np.transpose(out[0], (0,2,1,3))
+        out1 = np.transpose(out[1], (0,2,1,3))
+        
+        boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
+        
+        # inter-scale nms
+        pick = nms(boxes.copy(), 0.5, 'Union')
+        if boxes.size>0 and pick.size>0:
+            boxes = boxes[pick,:]
+            total_boxes = np.append(total_boxes, boxes, axis=0)
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        pick = nms(total_boxes.copy(), 0.7, 'Union')
+        total_boxes = total_boxes[pick,:]
+        regw = total_boxes[:,2]-total_boxes[:,0]
+        regh = total_boxes[:,3]-total_boxes[:,1]
+        qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
+        qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
+        qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
+        qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
+        total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
+        total_boxes = rerec(total_boxes.copy())
+        total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
+        dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        # second stage
+        tempimg = np.zeros((24,24,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (24, 24))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = rnet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        score = out1[1,:]
+        ipass = np.where(score>threshold[1])
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+        if total_boxes.shape[0]>0:
+            pick = nms(total_boxes, 0.7, 'Union')
+            total_boxes = total_boxes[pick,:]
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
+            total_boxes = rerec(total_boxes.copy())
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        # third stage
+        total_boxes = np.fix(total_boxes).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+        tempimg = np.zeros((48,48,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (48, 48))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = onet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        out2 = np.transpose(out[2])
+        score = out2[1,:]
+        points = out1
+        ipass = np.where(score>threshold[2])
+        points = points[:,ipass[0]]
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+
+        w = total_boxes[:,2]-total_boxes[:,0]+1
+        h = total_boxes[:,3]-total_boxes[:,1]+1
+        points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
+        points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
+        if total_boxes.shape[0]>0:
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
+            pick = nms(total_boxes.copy(), 0.7, 'Min')
+            total_boxes = total_boxes[pick,:]
+            points = points[:,pick]
+                
+    return total_boxes, points
+
+def detect_face_force(img, bbox, pnet, rnet, onet):
+    total_boxes = np.zeros( (1,5), dtype=np.float32)
+    total_boxes[0,0:4] = bbox
+    threshold = [0.0,0.0,0.0]
+    h=img.shape[0]
+    w=img.shape[1]
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        dy,edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+        # second stage
+        tempimg = np.zeros((24,24,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (24, 24))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = rnet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        score = out1[1,:]
+        ipass = np.where(score>threshold[1])
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+        if total_boxes.shape[0]>0:
+            pick = nms(total_boxes, 0.7, 'Union')
+            total_boxes = total_boxes[pick,:]
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
+            total_boxes = rerec(total_boxes.copy())
+
+    numbox = total_boxes.shape[0]
+    if numbox>0:
+        # third stage
+        total_boxes = np.fix(total_boxes).astype(np.int32)
+        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
+        tempimg = np.zeros((48,48,3,numbox))
+        for k in range(0,numbox):
+            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
+            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
+            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
+                tempimg[:,:,:,k] = imresample(tmp, (48, 48))
+            else:
+                return np.empty()
+        tempimg = (tempimg-127.5)*0.0078125
+        tempimg1 = np.transpose(tempimg, (3,1,0,2))
+        out = onet(tempimg1)
+        out0 = np.transpose(out[0])
+        out1 = np.transpose(out[1])
+        out2 = np.transpose(out[2])
+        score = out2[1,:]
+        points = out1
+        ipass = np.where(score>threshold[2])
+        points = points[:,ipass[0]]
+        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
+        mv = out0[:,ipass[0]]
+
+        w = total_boxes[:,2]-total_boxes[:,0]+1
+        h = total_boxes[:,3]-total_boxes[:,1]+1
+        points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
+        points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
+        if total_boxes.shape[0]>0:
+            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
+            pick = nms(total_boxes.copy(), 0.7, 'Min')
+            total_boxes = total_boxes[pick,:]
+            points = points[:,pick]
+                
+    return total_boxes, points
+
+def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
+    # im: input image
+    # minsize: minimum of faces' size
+    # pnet, rnet, onet: caffemodel
+    # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
+
+    all_scales = [None] * len(images)
+    images_with_boxes = [None] * len(images)
+
+    for i in range(len(images)):
+        images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
+
+    # create scale pyramid
+    for index, img in enumerate(images):
+        all_scales[index] = []
+        h = img.shape[0]
+        w = img.shape[1]
+        minsize = int(detection_window_size_ratio * np.minimum(w, h))
+        factor_count = 0
+        minl = np.amin([h, w])
+        if minsize <= 12:
+            minsize = 12
+
+        m = 12.0 / minsize
+        minl = minl * m
+        while minl >= 12:
+            all_scales[index].append(m * np.power(factor, factor_count))
+            minl = minl * factor
+            factor_count += 1
+
+    # # # # # # # # # # # # #
+    # first stage - fast proposal network (pnet) to obtain face candidates
+    # # # # # # # # # # # # #
+
+    images_obj_per_resolution = {}
+
+    # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
+
+    for index, scales in enumerate(all_scales):
+        h = images[index].shape[0]
+        w = images[index].shape[1]
+
+        for scale in scales:
+            hs = int(np.ceil(h * scale))
+            ws = int(np.ceil(w * scale))
+
+            if (ws, hs) not in images_obj_per_resolution:
+                images_obj_per_resolution[(ws, hs)] = []
+
+            im_data = imresample(images[index], (hs, ws))
+            im_data = (im_data - 127.5) * 0.0078125
+            img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
+            images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
+
+    for resolution in images_obj_per_resolution:
+        images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
+        outs = pnet(images_per_resolution)
+
+        for index in range(len(outs[0])):
+            scale = images_obj_per_resolution[resolution][index]['scale']
+            image_index = images_obj_per_resolution[resolution][index]['index']
+            out0 = np.transpose(outs[0][index], (1, 0, 2))
+            out1 = np.transpose(outs[1][index], (1, 0, 2))
+
+            boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
+
+            # inter-scale nms
+            pick = nms(boxes.copy(), 0.5, 'Union')
+            if boxes.size > 0 and pick.size > 0:
+                boxes = boxes[pick, :]
+                images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
+                                                                          boxes,
+                                                                          axis=0)
+
+    for index, image_obj in enumerate(images_with_boxes):
+        numbox = image_obj['total_boxes'].shape[0]
+        if numbox > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
+            regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
+            qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
+            qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
+            qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
+            qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
+            image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+            image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
+            dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+            numbox = image_obj['total_boxes'].shape[0]
+            tempimg = np.zeros((24, 24, 3, numbox))
+
+            if numbox > 0:
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (24, 24))
+                    else:
+                        return np.empty()
+
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+    # # # # # # # # # # # # #
+    # second stage - refinement of face candidates with rnet
+    # # # # # # # # # # # # #
+
+    bulk_rnet_input = np.empty((0, 24, 24, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' in image_obj:
+            bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
+
+    out = rnet(bulk_rnet_input)
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    score = out1[1, :]
+
+    i = 0
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'rnet_input' not in image_obj:
+            continue
+
+        rnet_input_count = image_obj['rnet_input'].shape[0]
+        score_per_image = score[i:i + rnet_input_count]
+        out0_per_image = out0[:, i:i + rnet_input_count]
+
+        ipass = np.where(score_per_image > threshold[1])
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+
+        mv = out0_per_image[:, ipass[0]]
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            h = images[index].shape[0]
+            w = images[index].shape[1]
+            pick = nms(image_obj['total_boxes'], 0.7, 'Union')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
+            image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
+
+            numbox = image_obj['total_boxes'].shape[0]
+
+            if numbox > 0:
+                tempimg = np.zeros((48, 48, 3, numbox))
+                image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
+                dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
+
+                for k in range(0, numbox):
+                    tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
+                    tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
+                    if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
+                        tempimg[:, :, :, k] = imresample(tmp, (48, 48))
+                    else:
+                        return np.empty()
+                tempimg = (tempimg - 127.5) * 0.0078125
+                image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
+
+        i += rnet_input_count
+
+    # # # # # # # # # # # # #
+    # third stage - further refinement and facial landmarks positions with onet
+    # # # # # # # # # # # # #
+
+    bulk_onet_input = np.empty((0, 48, 48, 3))
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' in image_obj:
+            bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
+
+    out = onet(bulk_onet_input)
+
+    out0 = np.transpose(out[0])
+    out1 = np.transpose(out[1])
+    out2 = np.transpose(out[2])
+    score = out2[1, :]
+    points = out1
+
+    i = 0
+    ret = []
+    for index, image_obj in enumerate(images_with_boxes):
+        if 'onet_input' not in image_obj:
+            ret.append(None)
+            continue
+
+        onet_input_count = image_obj['onet_input'].shape[0]
+
+        out0_per_image = out0[:, i:i + onet_input_count]
+        score_per_image = score[i:i + onet_input_count]
+        points_per_image = points[:, i:i + onet_input_count]
+
+        ipass = np.where(score_per_image > threshold[2])
+        points_per_image = points_per_image[:, ipass[0]]
+
+        image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
+                                              np.expand_dims(score_per_image[ipass].copy(), 1)])
+        mv = out0_per_image[:, ipass[0]]
+
+        w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
+        h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
+        points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
+            image_obj['total_boxes'][:, 0], (5, 1)) - 1
+        points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
+            image_obj['total_boxes'][:, 1], (5, 1)) - 1
+
+        if image_obj['total_boxes'].shape[0] > 0:
+            image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
+            pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
+            image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
+            points_per_image = points_per_image[:, pick]
+
+            ret.append((image_obj['total_boxes'], points_per_image))
+        else:
+            ret.append(None)
+
+        i += onet_input_count
+
+    return ret
+
+
+# function [boundingbox] = bbreg(boundingbox,reg)
+def bbreg(boundingbox,reg):
+    # calibrate bounding boxes
+    if reg.shape[1]==1:
+        reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
+
+    w = boundingbox[:,2]-boundingbox[:,0]+1
+    h = boundingbox[:,3]-boundingbox[:,1]+1
+    b1 = boundingbox[:,0]+reg[:,0]*w
+    b2 = boundingbox[:,1]+reg[:,1]*h
+    b3 = boundingbox[:,2]+reg[:,2]*w
+    b4 = boundingbox[:,3]+reg[:,3]*h
+    boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
+    return boundingbox
+ 
+def generateBoundingBox(imap, reg, scale, t):
+    # use heatmap to generate bounding boxes
+    stride=2
+    cellsize=12
+
+    imap = np.transpose(imap)
+    dx1 = np.transpose(reg[:,:,0])
+    dy1 = np.transpose(reg[:,:,1])
+    dx2 = np.transpose(reg[:,:,2])
+    dy2 = np.transpose(reg[:,:,3])
+    y, x = np.where(imap >= t)
+    if y.shape[0]==1:
+        dx1 = np.flipud(dx1)
+        dy1 = np.flipud(dy1)
+        dx2 = np.flipud(dx2)
+        dy2 = np.flipud(dy2)
+    score = imap[(y,x)]
+    reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
+    if reg.size==0:
+        reg = np.empty((0,3))
+    bb = np.transpose(np.vstack([y,x]))
+    q1 = np.fix((stride*bb+1)/scale)
+    q2 = np.fix((stride*bb+cellsize-1+1)/scale)
+    boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
+    return boundingbox, reg
+ 
+# function pick = nms(boxes,threshold,type)
+def nms(boxes, threshold, method):
+    if boxes.size==0:
+        return np.empty((0,3))
+    x1 = boxes[:,0]
+    y1 = boxes[:,1]
+    x2 = boxes[:,2]
+    y2 = boxes[:,3]
+    s = boxes[:,4]
+    area = (x2-x1+1) * (y2-y1+1)
+    I = np.argsort(s)
+    pick = np.zeros_like(s, dtype=np.int16)
+    counter = 0
+    while I.size>0:
+        i = I[-1]
+        pick[counter] = i
+        counter += 1
+        idx = I[0:-1]
+        xx1 = np.maximum(x1[i], x1[idx])
+        yy1 = np.maximum(y1[i], y1[idx])
+        xx2 = np.minimum(x2[i], x2[idx])
+        yy2 = np.minimum(y2[i], y2[idx])
+        w = np.maximum(0.0, xx2-xx1+1)
+        h = np.maximum(0.0, yy2-yy1+1)
+        inter = w * h
+        if method is 'Min':
+            o = inter / np.minimum(area[i], area[idx])
+        else:
+            o = inter / (area[i] + area[idx] - inter)
+        I = I[np.where(o<=threshold)]
+    pick = pick[0:counter]
+    return pick
+
+# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
+def pad(total_boxes, w, h):
+    # compute the padding coordinates (pad the bounding boxes to square)
+    tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
+    tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
+    numbox = total_boxes.shape[0]
+
+    dx = np.ones((numbox), dtype=np.int32)
+    dy = np.ones((numbox), dtype=np.int32)
+    edx = tmpw.copy().astype(np.int32)
+    edy = tmph.copy().astype(np.int32)
+
+    x = total_boxes[:,0].copy().astype(np.int32)
+    y = total_boxes[:,1].copy().astype(np.int32)
+    ex = total_boxes[:,2].copy().astype(np.int32)
+    ey = total_boxes[:,3].copy().astype(np.int32)
+
+    tmp = np.where(ex>w)
+    edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
+    ex[tmp] = w
+    
+    tmp = np.where(ey>h)
+    edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
+    ey[tmp] = h
+
+    tmp = np.where(x<1)
+    dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
+    x[tmp] = 1
+
+    tmp = np.where(y<1)
+    dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
+    y[tmp] = 1
+    
+    return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
+
+# function [bboxA] = rerec(bboxA)
+def rerec(bboxA):
+    # convert bboxA to square
+    h = bboxA[:,3]-bboxA[:,1]
+    w = bboxA[:,2]-bboxA[:,0]
+    l = np.maximum(w, h)
+    bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
+    bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
+    bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
+    return bboxA
+
+def imresample(img, sz):
+    im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
+    return im_data
+
+    # This method is kept for debugging purpose
+#     h=img.shape[0]
+#     w=img.shape[1]
+#     hs, ws = sz
+#     dx = float(w) / ws
+#     dy = float(h) / hs
+#     im_data = np.zeros((hs,ws,3))
+#     for a1 in range(0,hs):
+#         for a2 in range(0,ws):
+#             for a3 in range(0,3):
+#                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
+#     return im_data
+
diff --git a/src/common/face2rec2.py b/src/common/face2rec2.py
new file mode 100644
index 0000000..46b6115
--- /dev/null
+++ b/src/common/face2rec2.py
@@ -0,0 +1,251 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+import os
+import sys
+
+#curr_path = os.path.abspath(os.path.dirname(__file__))
+#sys.path.append(os.path.join(curr_path, "../python"))
+import mxnet as mx
+import random
+import argparse
+import cv2
+import time
+import traceback
+#from builtins import range
+from easydict import EasyDict as edict
+import face_preprocess
+
+try:
+    import multiprocessing
+except ImportError:
+    multiprocessing = None
+
+
+
+def read_list(path_in):
+    with open(path_in) as fin:
+        identities = []
+        last = [-1, -1]
+        _id = 1
+        while True:
+            line = fin.readline()
+            if not line:
+                break
+            item = edict()
+            item.flag = 0
+            item.image_path, item.label, item.bbox, item.landmark, item.aligned = face_preprocess.parse_lst_line(line)
+            item.id = _id
+            yield item
+            if item.label!=last[0]:
+              if last[1]>=0:
+                identities.append( (last[1], _id) )
+              last[0] = item.label
+              last[1] = _id
+            _id+=1
+        identities.append( (last[1], _id) )
+        item = edict()
+        item.flag = 1
+        item.id = 0
+        item.label = [float(_id), float(_id+len(identities))]
+        yield item
+        for identity in identities:
+          item = edict()
+          item.flag = 2
+          item.id = _id
+          _id+=1
+          item.label = [float(identity[0]), float(identity[1])]
+          yield item
+
+
+
+def image_encode(args, i, item, q_out):
+    oitem = [item.id]
+    if item.flag==0:
+      fullpath = item.image_path
+      header = mx.recordio.IRHeader(item.flag, item.label, item.id, 0)
+      #print('write', item.flag, item.id, item.label)
+      if item.aligned:
+        with open(fullpath, 'rb') as fin:
+            img = fin.read()
+        s = mx.recordio.pack(header, img)
+        q_out.put((i, s, oitem))
+      else:
+        img = cv2.imread(fullpath, args.color)
+        assert item.landmark is not None
+        img = face_preprocess.preprocess(img, bbox = item.bbox, landmark=item.landmark, image_size='112,112')
+        s = mx.recordio.pack_img(header, img, quality=args.quality, img_fmt=args.encoding)
+        q_out.put((i, s, oitem))
+    else: #flag==1 or 2
+      header = mx.recordio.IRHeader(item.flag, item.label, item.id, 0)
+      #print('write', item.flag, item.id, item.label)
+      s = mx.recordio.pack(header, '')
+      q_out.put((i, s, oitem))
+
+
+def read_worker(args, q_in, q_out):
+    while True:
+        deq = q_in.get()
+        if deq is None:
+            break
+        i, item = deq
+        image_encode(args, i, item, q_out)
+
+def write_worker(q_out, fname, working_dir):
+    pre_time = time.time()
+    count = 0
+    fname = os.path.basename(fname)
+    fname_rec = os.path.splitext(fname)[0] + '.rec'
+    fname_idx = os.path.splitext(fname)[0] + '.idx'
+    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
+                                           os.path.join(working_dir, fname_rec), 'w')
+    buf = {}
+    more = True
+    while more:
+        deq = q_out.get()
+        if deq is not None:
+            i, s, item = deq
+            buf[i] = (s, item)
+        else:
+            more = False
+        while count in buf:
+            s, item = buf[count]
+            del buf[count]
+            if s is not None:
+                record.write_idx(item[0], s)
+
+            if count % 1000 == 0:
+                cur_time = time.time()
+                print('time:', cur_time - pre_time, ' count:', count)
+                pre_time = cur_time
+            count += 1
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description='Create an image list or \
+        make a record database by reading from an image list')
+    parser.add_argument('prefix', help='prefix of input/output lst and rec files.')
+    #parser.add_argument('root', help='path to folder containing images.')
+
+    cgroup = parser.add_argument_group('Options for creating image lists')
+    cgroup.add_argument('--list', type=bool, default=False,
+                        help='If this is set im2rec will create image list(s) by traversing root folder\
+        and output to <prefix>.lst.\
+        Otherwise im2rec will read <prefix>.lst and create a database at <prefix>.rec')
+    cgroup.add_argument('--exts', nargs='+', default=['.jpeg', '.jpg'],
+                        help='list of acceptable image extensions.')
+    cgroup.add_argument('--chunks', type=int, default=1, help='number of chunks.')
+    cgroup.add_argument('--train-ratio', type=float, default=1.0,
+                        help='Ratio of images to use for training.')
+    cgroup.add_argument('--test-ratio', type=float, default=0,
+                        help='Ratio of images to use for testing.')
+    cgroup.add_argument('--recursive', type=bool, default=False,
+                        help='If true recursively walk through subdirs and assign an unique label\
+        to images in each folder. Otherwise only include images in the root folder\
+        and give them label 0.')
+    cgroup.add_argument('--shuffle', type=bool, default=True, help='If this is set as True, \
+        im2rec will randomize the image order in <prefix>.lst')
+
+    rgroup = parser.add_argument_group('Options for creating database')
+    rgroup.add_argument('--quality', type=int, default=95,
+                        help='JPEG quality for encoding, 1-100; or PNG compression for encoding, 1-9')
+    rgroup.add_argument('--num-thread', type=int, default=1,
+                        help='number of thread to use for encoding. order of images will be different\
+        from the input list if >1. the input list will be modified to match the\
+        resulting order.')
+    rgroup.add_argument('--color', type=int, default=1, choices=[-1, 0, 1],
+                        help='specify the color mode of the loaded image.\
+        1: Loads a color image. Any transparency of image will be neglected. It is the default flag.\
+        0: Loads image in grayscale mode.\
+        -1:Loads image as such including alpha channel.')
+    rgroup.add_argument('--encoding', type=str, default='.jpg', choices=['.jpg', '.png'],
+                        help='specify the encoding of the images.')
+    rgroup.add_argument('--pack-label', type=bool, default=False,
+        help='Whether to also pack multi dimensional label in the record file')
+    rgroup.add_argument('--image-size', type=str, default='112,96', choices=['112,96', '112,112'], help='image size, set to 112,96 or 112,112')
+    args = parser.parse_args()
+    args.prefix = os.path.abspath(args.prefix)
+    #args.root = os.path.abspath(args.root)
+    return args
+
+if __name__ == '__main__':
+    args = parse_args()
+    if args.list:
+        make_list(args)
+    else:
+        if os.path.isdir(args.prefix):
+            working_dir = args.prefix
+        else:
+            working_dir = os.path.dirname(args.prefix)
+        files = [os.path.join(working_dir, fname) for fname in os.listdir(working_dir)
+                    if os.path.isfile(os.path.join(working_dir, fname))]
+        count = 0
+        for fname in files:
+            if fname.startswith(args.prefix) and fname.endswith('.lst'):
+                print('Creating .rec file from', fname, 'in', working_dir)
+                count += 1
+                image_list = read_list(fname)
+                # -- write_record -- #
+                if args.num_thread > 1 and multiprocessing is not None:
+                    q_in = [multiprocessing.Queue(1024) for i in range(args.num_thread)]
+                    q_out = multiprocessing.Queue(1024)
+                    read_process = [multiprocessing.Process(target=read_worker, args=(args, q_in[i], q_out)) \
+                                    for i in range(args.num_thread)]
+                    for p in read_process:
+                        p.start()
+                    write_process = multiprocessing.Process(target=write_worker, args=(q_out, fname, working_dir))
+                    write_process.start()
+
+                    for i, item in enumerate(image_list):
+                        q_in[i % len(q_in)].put((i, item))
+                    for q in q_in:
+                        q.put(None)
+                    for p in read_process:
+                        p.join()
+
+                    q_out.put(None)
+                    write_process.join()
+                else:
+                    print('multiprocessing not available, fall back to single threaded encoding')
+                    try:
+                        import Queue as queue
+                    except ImportError:
+                        import queue
+                    q_out = queue.Queue()
+                    fname = os.path.basename(fname)
+                    fname_rec = os.path.splitext(fname)[0] + '.rec'
+                    fname_idx = os.path.splitext(fname)[0] + '.idx'
+                    record = mx.recordio.MXIndexedRecordIO(os.path.join(working_dir, fname_idx),
+                                                           os.path.join(working_dir, fname_rec), 'w')
+                    cnt = 0
+                    pre_time = time.time()
+                    for i, item in enumerate(image_list):
+                        image_encode(args, i, item, q_out)
+                        if q_out.empty():
+                            continue
+                        _, s, _ = q_out.get()
+                        record.write_idx(item[0], s)
+                        if cnt % 1000 == 0:
+                            cur_time = time.time()
+                            print('time:', cur_time - pre_time, ' count:', cnt)
+                            pre_time = cur_time
+                        cnt += 1
+        if not count:
+            print('Did not find and list file with prefix %s'%args.prefix)
diff --git a/src/inceptions.py b/src/inceptions.py
deleted file mode 100644
index 2f1a8bd..0000000
--- a/src/inceptions.py
+++ /dev/null
@@ -1,720 +0,0 @@
-# -*- coding:utf-8 -*-
-__author__ = 'zhangshuai'
-modified_date = '16/7/5'
-__modify__ = 'anchengwu'
-modified_date = '17/2/22'
-__modify2__ = 'weiyangwang'
-modified_date = '17/9/20'
-
-
-'''
-Inception v4 , suittable for image with around 299 x 299
-
-Reference:
-    Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning
-    Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke
-    arXiv.1602.07261
-    
-    
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
-    
-'''
-
-
-# --------------------------------------------------------
-
-# Modified By DeepInsight
-
-#  0. Make Code Tidier (with exec)
-#  1. Scalable Inception V3, V4, -resnetV2
-#  2. Todo: Modified For XCeption, make Conv11 num_group_11 and Other Conv num_group independent.
-#  3. Todo: Module Options: Deformable, Attention Along Features/Along Image
-#  4. Todo: Adaptive Encoder-Decoder Symbol For Segmenter
-#  5. Todo: Adaptive Symbol For Detector
-
-# --------------------------------------------------------
-
-
-import mxnet as mx
-import numpy as np
-
-######## Inception Common:
-
-## Todo: Deformable, Attention
-
-def Conv(data, num_filter, num_group = 1, kernel=(1, 1), stride=(1, 1), pad=(0, 0), \
-         act_type="relu", mirror_attr={}, with_act=True, name=None, suffix=''):
-    
-    conv = mx.sym.Convolution(data=data, num_filter=num_filter, num_group=num_group, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix))
-    bn = mx.sym.BatchNorm(data=conv, name='%s%s_batchnorm' %(name, suffix), fix_gamma=True)
-    if with_act:
-        act = mx.sym.Activation(data=bn, act_type=act_type, name='%s%s_relu' %(name, suffix))
-        return act
-    else:
-        return bn
-    
-def get_input_size(lastout=8):
-    input_size = 2*lastout + 1 # 17
-    input_size = 2*input_size + 1 # 35
-    input_size = 2*input_size + 1 # 71
-    input_size = input_size + 2 # 73
-    input_size = 2*input_size + 1 # 147
-    input_size = input_size + 2 # 149
-    input_size = 2*input_size + 1 # 299
-    return input_size
-    
-
-######## Inception ResNetv2: Scalable, XCeptionized
-
-# Todo Scalable and XCeptionized
-
-''' Fade-away ConvFactory
-
-def ConvFactory(data, num_filter, kernel, stride=(1, 1), pad=(0, 0), act_type="relu", mirror_attr={}, with_act=True):
-    conv = mx.symbol.Convolution(
-        data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad)
-    bn = mx.symbol.BatchNorm(data=conv)
-    if with_act:
-        act = mx.symbol.Activation(
-            data=bn, act_type=act_type, attr=mirror_attr)
-        return act
-    else:
-        return bn
-'''
-
-def block35_irv2(net, input_num_channels, 
-                 basefilter=16, num_group=1 ,num_group_11=1, scale=1.0,
-                 with_act=True, act_type='relu', mirror_attr={}, name=None):
-    #Conv11
-    tower_conv = Conv(net, basefilter*2, num_group=num_group_11, kernel=(1, 1), name=name+'_35b11')
-    #Conv11-Conv33
-    tower_conv1_0 = Conv(net, basefilter*2, num_group=num_group_11, kernel=(1, 1), name=name+'_35b21')
-    tower_conv1_1 = Conv(tower_conv1_0, basefilter*2, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=name+'_35b22')
-    #Conv11-Conv33-Conv33
-    tower_conv2_0 = Conv(net, basefilter*2, num_group=num_group_11,kernel=(1, 1), name=name+'_35b31')
-    tower_conv2_1 = Conv(tower_conv2_0, basefilter*3, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=name+'_35b32')
-    tower_conv2_2 = Conv(tower_conv2_1, basefilter*4, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=name+'_35b33')
-    #Concat
-    tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_1, tower_conv2_2])
-    tower_out = Conv(tower_mixed, input_num_channels, num_group=num_group_11, kernel=(1, 1), with_act=False, name=name+'_35out')
-    
-    
-    net = net + tower_out * scale
-    if with_act:
-        act = mx.symbol.Activation(
-            data=net, act_type=act_type, attr=mirror_attr)
-        return act
-    else:
-        return net
-
-
-def block17_irv2(net, input_num_channels,
-                 basefilter=32, num_group=1 ,num_group_11=1, scale=1.0,
-                 with_act=True, act_type='relu', mirror_attr={}, name=None):
-    # Conv11
-    tower_conv = Conv(net, basefilter*6, num_group=num_group_11, kernel=(1, 1), name=name+'_17b11')
-    # Conv11-Conv17-Conv71
-    tower_conv1_0 = Conv(net, basefilter*6, num_group=num_group_11, kernel=(1, 1), name=name+'_17b21')
-    tower_conv1_1 = Conv(tower_conv1_0, basefilter*5, num_group=num_group, kernel=(1, 7), pad=(1, 2), name=name+'_17b22')
-    tower_conv1_2 = Conv(tower_conv1_1, basefilter*6, num_group=num_group, kernel=(7, 1), pad=(2, 1), name=name+'_17b23')
-    # Concat
-    tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
-    # Conv11
-    tower_out = Conv(
-        tower_mixed, input_num_channels, num_group=num_group_11, kernel=(1, 1), with_act=False, name=name+'_17out')
-    net = net + tower_out * scale
-    if with_act:
-        act = mx.symbol.Activation(
-            data=net, act_type=act_type, attr=mirror_attr)
-        return act
-    else:
-        return net
-
-
-def block8_irv2(net, input_num_channels,
-                basefilter=32, num_group=1 ,num_group_11=1, scale=1.0,
-                with_act=True, act_type='relu', mirror_attr={}, name=None):
-    # Conv11
-    tower_conv = Conv(net, basefilter*6, num_group=num_group_11, kernel=(1, 1), name=name+'_8b11')
-    # Conv11-Conv13-Conv31
-    tower_conv1_0 = Conv(net, basefilter*6, num_group=num_group_11, kernel=(1, 1), name=name+'_8b21')
-    tower_conv1_1 = Conv(tower_conv1_0, basefilter*7, num_group=num_group, kernel=(1, 3), pad=(0, 1), name=name+'_8b22')
-    tower_conv1_2 = Conv(tower_conv1_1, basefilter*8, num_group=num_group, kernel=(3, 1), pad=(1, 0), name=name+'_8b23')
-    #Concat
-    tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
-    #Conv11
-    tower_out = Conv(
-        tower_mixed, input_num_channels, num_group=num_group_11, kernel=(1, 1), with_act=False, name=name+'_8out')
-    
-    net = net + tower_out * scale
-    if with_act:
-        act = mx.symbol.Activation(
-            data=net, act_type=act_type, attr=mirror_attr)
-        return act
-    else:
-        return net
-
-
-def repeat(inputs, repetitions, layer, name=None, *args, **kwargs):
-    outputs = inputs
-    for i in range(repetitions):
-        outputs = layer(outputs, name=name+'_'+str(i), *args, **kwargs)
-    return outputs
-
-
-def get_symbol_irv2(num_classes=1000, 
-               basefilter=16, num_group=1 ,num_group_11=1, scale=1.0,
-               lastout = 8,
-               units = [10,20,9],
-               **kwargs):
-    data = mx.symbol.Variable(name='data')
-    # Size 299
-    # Stem 1 And Downsampling
-    conv1a_3_3 = Conv(data,
-                      basefilter*2, num_group=num_group,
-                      kernel=(3, 3), stride=(2, 2), name='conv1a')
-    # Size 149
-    conv2a_3_3 = Conv(conv1a_3_3, basefilter*2, num_group=num_group, kernel=(3, 3), name='conv2a')
-    # Size 147
-    conv2b_3_3 = Conv(conv2a_3_3, basefilter*4, num_group=num_group, kernel=(3, 3), pad=(1, 1), name='conv2b')
-    # Size 147
-    maxpool3a_3_3 = mx.symbol.Pooling(
-        data=conv2b_3_3, kernel=(3, 3), stride=(2, 2), pool_type='max')
-    # Stem 2 And Downsampling
-    conv3b_1_1 = Conv(maxpool3a_3_3, basefilter*5, num_group=num_group_11, kernel=(1, 1), name='conv3b')
-    # 73
-    conv4a_3_3 = Conv(conv3b_1_1, basefilter*12, num_group=num_group, kernel=(3, 3), name='conv4a')
-    # 71
-    maxpool5a_3_3 = mx.symbol.Pooling(
-        data=conv4a_3_3, kernel=(3, 3), stride=(2, 2), pool_type='max')
-  
-    # Size 35
-    # Stem 3 And Downsampling
-    # Branch31: Conv11
-    tower_conv = Conv(maxpool5a_3_3, basefilter*6, num_group=num_group_11, kernel=(1, 1), name='branch31')
-    # Branch32: Conv11-Conv55
-    tower_conv1_0 = Conv(maxpool5a_3_3, basefilter*3, num_group=num_group_11, kernel=(1, 1), name='branch321')
-    tower_conv1_1 = Conv(tower_conv1_0, basefilter*4, num_group=num_group, kernel=(5, 5), pad=(2, 2), name='branch322')
-    # Branch33: Conv11-Conv33-Conv33
-    tower_conv2_0 = Conv(maxpool5a_3_3, basefilter*4, num_group=num_group_11, kernel=(1, 1), name='branch331')
-    tower_conv2_1 = Conv(tower_conv2_0, basefilter*6, num_group=num_group, kernel=(3, 3), pad=(1, 1), name='branch332')
-    tower_conv2_2 = Conv(tower_conv2_1, basefilter*6, num_group=num_group, kernel=(3, 3), pad=(1, 1), name='branch333')
-    # Branch34: Pool-Conv11
-    tower_pool3_0 = mx.symbol.Pooling(data=maxpool5a_3_3, kernel=(
-        3, 3), stride=(1, 1), pad=(1, 1), pool_type='avg')
-    tower_conv3_1 = Conv(tower_pool3_0, basefilter*4, num_group=num_group_11, kernel=(1, 1),name='branch34')
-    # Concat
-    tower_5b_out = mx.symbol.Concat(
-        *[tower_conv, tower_conv1_1, tower_conv2_2, tower_conv3_1])
-    
-    # Repeat 1
-    net = repeat(tower_5b_out, units[0], block35_irv2, scale=0.17, input_num_channels=basefilter*20,\
-                basefilter=basefilter, num_group=num_group ,num_group_11=num_group_11, name='repeat1')
-    
-    # Size 35
-    
-    # Branch 41
-    tower_conv = Conv(net, basefilter*24, num_group=num_group, kernel=(3, 3), stride=(2, 2), name='branch41')
-    # Branch 42
-    tower_conv1_0 = Conv(net, basefilter*16, num_group=num_group_11, kernel=(1, 1), name='branch421')
-    tower_conv1_1 = Conv(tower_conv1_0, basefilter*16, num_group=num_group, kernel=(3, 3), pad=(1, 1), name='branch422')
-    tower_conv1_2 = Conv(tower_conv1_1, basefilter*24, num_group=num_group, kernel=(3, 3), stride=(2, 2), name='branch423')
-    tower_pool = mx.symbol.Pooling(net, kernel=(
-        3, 3), stride=(2, 2), pool_type='max')
-    
-    # Concat 
-    net = mx.symbol.Concat(*[tower_conv, tower_conv1_2, tower_pool])
-    # Repeat 2
-    net = repeat(net, units[1], block17_irv2, scale=0.1, input_num_channels=basefilter*68,\
-                basefilter=basefilter*2, num_group=num_group, num_group_11=num_group_11,name='repeat2')
-    
-    
-    # Size 17
-    
-    # Branch51: Conv11-Conv33
-    tower_conv = Conv(net, basefilter*16, num_group=num_group_11, kernel=(1, 1) ,name='branch511')
-    tower_conv0_1 = Conv(tower_conv, basefilter*24, num_group=num_group, kernel=(3, 3), stride=(2, 2) ,name='branch512')
-    # Branch52: Conv11-Conv33 ? Is this XCeption
-    tower_conv1 = Conv(net, basefilter*16, num_group=num_group_11, kernel=(1, 1) ,name='branch521')
-    tower_conv1_1 = Conv(tower_conv1, basefilter*18, num_group=num_group, kernel=(3, 3), stride=(2, 2) ,name='branch522')
-    # Branch53: Conv11-Conv33-Conv33
-    tower_conv2 = Conv(net, basefilter*16, num_group=num_group_11, kernel=(1, 1) ,name='branch531')
-    tower_conv2_1 = Conv(tower_conv2, basefilter*18,  num_group=num_group, kernel=(3, 3), pad=(1, 1) ,name='branch532')
-    tower_conv2_2 = Conv(tower_conv2_1, basefilter*20,  num_group=num_group, kernel=(3, 3),  stride=(2, 2) ,name='branch533')
-    # Pool33
-    tower_pool = mx.symbol.Pooling(net, kernel=(
-        3, 3), stride=(2, 2), pool_type='max')
-    net = mx.symbol.Concat(
-        *[tower_conv0_1, tower_conv1_1, tower_conv2_2, tower_pool])
-
-    # Size 8 
-    net = repeat(net, units[2], block8_irv2, scale=0.2, input_num_channels=basefilter*130,\
-                basefilter=basefilter*2, num_group=num_group ,num_group_11=num_group_11,name='repeat3')
-    net = block8_irv2(net, with_act=False, input_num_channels=basefilter*130,
-                     basefilter=basefilter*2, num_group=num_group ,num_group_11=num_group_11,name='block8')
-    
-    # Trailing
-    net = Conv(net, basefilter*96, num_group=num_group_11, kernel=(1, 1), name='trailing')
-    net = mx.symbol.Pooling(net, kernel=(
-        1, 1), global_pool=True, stride=(2, 2), pool_type='avg')
-    net = mx.symbol.Flatten(net)
-    net = mx.symbol.Dropout(data=net, p=0.2)
-    net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes)
-    softmax = mx.symbol.SoftmaxOutput(data=net, name='softmax')
-    return net, softmax
-
-
-
-######## Inception V4: Scalable, XCeptionized
-
-def Inception_stem_V4(data, basefilter=32, stem_num_group=1, stem_num_group_11=1, name= None):
-    
-    # Size 299
-    c = Conv(data, basefilter, num_group=stem_num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv1_3*3' %name)
-    # 149
-    c = Conv(c, basefilter, num_group=stem_num_group, kernel=(3, 3), name='%s_conv2_3*3' %name)
-    # 147
-    c = Conv(c, basefilter, num_group=stem_num_group, kernel=(3, 3), pad=(1, 1), name='%s_conv3_3*3' %name)
-    # 147
-    p1 = mx.sym.Pooling(c, kernel=(3, 3), stride=(2, 2), pool_type='max', name='%s_maxpool_1' %name)
-    # 73 
-    c2 = Conv(c, basefilter*3, num_group=stem_num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv4_3*3' %name)
-    concat = mx.sym.Concat(*[p1, c2], name='%s_concat_1' %name)
-
-    c1 = Conv(concat, basefilter*2, num_group=stem_num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv5_1*1' %name)
-    c1 = Conv(c1, basefilter*3, num_group=stem_num_group, kernel=(3, 3), name='%s_conv6_3*3' %name)
-    
-    # 71
-    
-    c2 = Conv(concat, basefilter*2, num_group=stem_num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv7_1*1' %name)
-    c2 = Conv(c2, basefilter*2, num_group=stem_num_group, kernel=(7, 1), pad=(3, 0), name='%s_conv8_7*1' %name)
-    c2 = Conv(c2, basefilter*2, num_group=stem_num_group, kernel=(1, 7), pad=(0, 3), name='%s_conv9_1*7' %name)
-    c2 = Conv(c2, basefilter*3, num_group=stem_num_group, kernel=(3, 3), pad=(0, 0), name='%s_conv10_3*3' %name)
-
-    concat = mx.sym.Concat(*[c1, c2], name='%s_concat_2' %name)
-
-    c1 = Conv(concat, basefilter*6, num_group=stem_num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv11_3*3' %name)
-    p1 = mx.sym.Pooling(concat, kernel=(3, 3), stride=(2, 2), pool_type='max', name='%s_maxpool_2' %name)
-
-    # 35
-    
-    concat = mx.sym.Concat(*[c1, p1], name='%s_concat_3' %name)
-    return concat
-
-
-def InceptionA_V4(input, basefilter=32, num_group=1 ,num_group_11=1,  name=None):
-    # Pool33-Conv11
-    p1 = mx.sym.Pooling(input, kernel=(3, 3), pad=(1, 1), pool_type='avg', name='%s_avgpool_1' %name)
-    c1 = Conv(p1, basefilter*3, kernel=(1, 1), num_group=num_group_11, pad=(0, 0), name='%s_conv1_1*1' %name)
-    # Conv11
-    c2 = Conv(input, basefilter*3, kernel=(1, 1), num_group=num_group_11, pad=(0, 0), name='%s_conv2_1*1' %name)
-    # Conv11-Conv33
-    c3 = Conv(input, basefilter*2, kernel=(1, 1), num_group=num_group_11, pad=(0, 0), name='%s_conv3_1*1' %name)
-    c3 = Conv(c3, basefilter*3, kernel=(3, 3), num_group=num_group, pad=(1, 1), name='%s_conv4_3*3' %name)
-    # Conv11-Conv33-Conv33
-    c4 = Conv(input, basefilter*2, kernel=(1, 1), num_group=num_group_11, pad=(0, 0), name='%s_conv5_1*1' % name)
-    c4 = Conv(c4, basefilter*3, kernel=(3, 3), num_group=num_group, pad=(1, 1), name='%s_conv6_3*3' % name)
-    c4 = Conv(c4, basefilter*3, kernel=(3, 3), num_group=num_group, pad=(1, 1), name='%s_conv7_3*3' %name)
-    
-    concat = mx.sym.Concat(*[c1, c2, c3, c4], name='%s_concat_1' %name)
-    return concat
-
-
-def ReductionA_V4(input, basefilter=32, num_group=1, num_group_11=1, name=None):
-    # Pool33
-    p1 = mx.sym.Pooling(input, kernel=(3, 3), stride=(2, 2), pool_type='max', name='%s_maxpool_1' %name)
-    # Conv33
-    c2 = Conv(input, basefilter*12, num_group=num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv1_3*3' %name)
-    # Conv11-Conv33-Conv33
-    c3 = Conv(input, basefilter*6, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv2_1*1' %name)
-    c3 = Conv(c3, basefilter*7, num_group=num_group, kernel=(3, 3), pad=(1, 1), name='%s_conv3_3*3' %name)
-    c3 = Conv(c3, basefilter*8, num_group=num_group, kernel=(3, 3), stride=(2, 2), pad=(0, 0), name='%s_conv4_3*3' %name)
-
-    concat = mx.sym.Concat(*[p1, c2, c3], name='%s_concat_1' %name)
-
-    return concat
-
-def InceptionB_V4(input, basefilter=32, num_group=1, num_group_11=1, name=None):
-    # Pool33-Conv11
-    p1 = mx.sym.Pooling(input, kernel=(3, 3), pad=(1, 1), pool_type='avg', name='%s_avgpool_1' %name)
-    c1 = Conv(p1, basefilter*4, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv1_1*1' %name)
-    # Conv11
-    c2 = Conv(input, basefilter*12, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv2_1*1' %name)
-    # Conv11-Conv17-Conv71
-    c3 = Conv(input, basefilter*6, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv3_1*1' %name)
-    c3 = Conv(c3, basefilter*7, num_group=num_group, kernel=(1, 7), pad=(0, 3), name='%s_conv4_1*7' %name)
-    #paper wrong
-    c3 = Conv(c3, basefilter*8, num_group=num_group, kernel=(7, 1), pad=(3, 0), name='%s_conv5_1*7' %name)
-    
-    # COnv11-Conv17-Conv71-Conv17-Conv71
-    c4 = Conv(input, basefilter*6, kernel=(1, 1), pad=(0, 0), name='%s_conv6_1*1' %name)
-    c4 = Conv(c4, basefilter*6, num_group=num_group, kernel=(1, 7), pad=(0, 3), name='%s_conv7_1*7' %name)
-    c4 = Conv(c4, basefilter*7, num_group=num_group, kernel=(7, 1), pad=(3, 0), name='%s_conv8_7*1' %name)
-    c4 = Conv(c4, basefilter*7, num_group=num_group, kernel=(1, 7), pad=(0, 3), name='%s_conv9_1*7' %name)
-    c4 = Conv(c4, basefilter*8, num_group=num_group, kernel=(7, 1), pad=(3, 0), name='%s_conv10_7*1' %name)
-
-    concat = mx.sym.Concat(*[c1, c2, c3, c4], name='%s_concat_1' %name)
-
-    return concat
-
-def ReductionB_V4(input, basefilter=64, num_group=1, num_group_11=1,  name=None):
-    # Pool33
-    p1 = mx.sym.Pooling(input, kernel=(3, 3), stride=(2, 2), pool_type='max', name='%s_maxpool_1' %name)
-    # Conv11-Conv33
-    c2 = Conv(input, basefilter*3 , num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv1_1*1' %name)
-    c2 = Conv(c2, basefilter*3, num_group=num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv2_3*3' %name)
-    # Conv11-Conv17-Conv71-Conv33
-    c3 = Conv(input, basefilter*3, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv3_1*1' %name)
-    c3 = Conv(c3, basefilter*4, num_group=num_group, kernel=(1, 7), pad=(0, 3), name='%s_conv4_1*7' %name)
-    c3 = Conv(c3, basefilter*5, num_group=num_group, kernel=(7, 1), pad=(3, 0), name='%s_conv5_7*1' %name)
-    c3 = Conv(c3, basefilter*5, num_group=num_group, kernel=(3, 3), stride=(2, 2), name='%s_conv6_3*3' %name)
-
-    concat = mx.sym.Concat(*[p1, c2, c3], name='%s_concat_1' %name)
-
-    return concat
-
-
-def InceptionC_V4(input, basefilter=64, num_group=1, num_group_11=1, name=None):
-    # Pool33-Conv11
-    p1 = mx.sym.Pooling(input, kernel=(3, 3), pad=(1, 1), pool_type='avg', name='%s_avgpool_1' %name)
-    c1 = Conv(p1, basefilter*4, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv1_1*1' %name)
-    # Conv11
-    c2 = Conv(input, basefilter*4, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv2_1*1' %name)
-    # Conv11-[Conv13;Conv31]
-    c3 = Conv(input, basefilter*6, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv3_1*1' %name)
-    c3_1 = Conv(c3, basefilter*4, num_group=num_group, kernel=(1, 3), pad=(0, 1), name='%s_conv4_3*1' %name)
-    c3_2 = Conv(c3, basefilter*4, num_group=num_group, kernel=(3, 1), pad=(1, 0), name='%s_conv5_1*3' %name)
-    # Conv11-Conv13-Conv31-[Conv13;Conv31]
-    c4 = Conv(input, basefilter*6, num_group=num_group_11, kernel=(1, 1), pad=(0, 0), name='%s_conv6_1*1' %name)
-    c4 = Conv(c4, basefilter*7, num_group=num_group, kernel=(1, 3), pad=(0, 1), name='%s_conv7_1*3' %name)
-    c4 = Conv(c4, basefilter*8, num_group=num_group, kernel=(3, 1), pad=(1, 0), name='%s_conv8_3*1' %name)
-    c4_1 = Conv(c4, basefilter*4, num_group=num_group, kernel=(3, 1), pad=(1, 0), name='%s_conv9_1*3' %name)
-    c4_2 = Conv(c4, basefilter*4, num_group=num_group, kernel=(1, 3), pad=(0, 1), name='%s_conv10_3*1' %name)
-
-    concat = mx.sym.Concat(*[c1, c2, c3_1, c3_2, c4_1, c4_2], name='%s_concat' %name)
-
-    return concat
-
-
-def get_symbol_V4(num_classes=1000, \
-                  units=[4,7,3], basefilter=32, num_group=1, num_group_11=1, \
-                  lastout=8,
-                  dtype='float32', **kwargs):
-    data = mx.sym.Variable(name="data")
-    if dtype == 'float32':
-        data = mx.sym.identity(data=data, name='id')
-    else:
-        if dtype == 'float16':
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    x = Inception_stem_V4(data, 
-                          basefilter=basefilter,
-                          stem_num_group=num_group,
-                          stem_num_group_11=num_group_11,
-                          name='in_stem')
-
-    #4 * InceptionA By Default
-
-    for i in range(units[0]):
-        x = InceptionA_V4(x,
-                          basefilter=basefilter,
-                          num_group=num_group,
-                          num_group_11=num_group_11,
-                          name='in%dA' %(i+1))
-
-    #Reduction A : Size 35-17
-    x = ReductionA_V4(x,
-                      basefilter=basefilter,
-                      num_group=num_group,
-                      num_group_11=num_group_11,
-                      name='re1A')
-
-    #7 * InceptionB By Default
-
-    for i in range(units[1]):
-        x = InceptionB_V4(x,
-                          basefilter=basefilter,
-                          num_group=num_group,
-                          num_group_11=num_group_11,
-                          name='in%dB' %(i+1))
-
-    #ReductionB : Size 17-8
-    x = ReductionB_V4(x,
-                      basefilter=basefilter*2,
-                      num_group=num_group,
-                      num_group_11=num_group_11,
-                      name='re1B')
-
-    #3 * InceptionC By Default
-
-    for i in range(units[2]):
-        x = InceptionC_V4(x,
-                          basefilter=basefilter*2,
-                          num_group=num_group,
-                          num_group_11=num_group_11,
-                          name='in%dC' %(i+1))
-
-    #Average Pooling
-    x = mx.sym.Pooling(x, kernel=(lastout, lastout), pad=(1, 1), pool_type='avg', name='global_avgpool')
-
-    #Dropout
-    x = mx.sym.Dropout(x, p=0.2)
-
-    flatten = mx.sym.Flatten(x, name='flatten')
-    fc1 = mx.sym.FullyConnected(flatten, num_hidden=num_classes, name='fc1')
-    if dtype == 'float16':
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    softmax = mx.sym.SoftmaxOutput(fc1, name='softmax')
-
-    return softmax
-
-
-
-######## Inception V3: Scalable, XCeptionized
-
-# First Stage
-def Inception7A_V3(data,
-                   basefilter=16,  #
-                   num_filters=[], # Length-7
-                   num_group=1, num_group_11=1,
-                   pool='avg', name=''):
-    assert len(num_filters)==7
-    num_1x1, num_3x3_red, num_3x3_1, num_3x3_2, num_5x5_red, num_5x5, proj = tuple( num_filters )
-    # Branch 1 : Conv11
-    tower_1x1 = Conv(data, basefilter*num_1x1, num_group=num_group_11,  name=('%s_conv' % name))
-    # Branch 2 : Conv11-Conv55
-    tower_5x5 = Conv(data, basefilter*num_5x5_red, num_group=num_group_11,  name=('%s_tower' % name), suffix='_conv')
-    tower_5x5 = Conv(tower_5x5, basefilter*num_5x5, num_group=num_group, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
-    # Branch 3 : Conv11-Conv33-Conv33
-    tower_3x3 = Conv(data, basefilter*num_3x3_red, num_group=num_group_11, name=('%s_tower_1' % name), suffix='_conv')
-    tower_3x3 = Conv(tower_3x3, basefilter*num_3x3_1, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_3x3 = Conv(tower_3x3, basefilter*num_3x3_2, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2')
-    # Branch 4: Pool33-Conv11
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(pooling, basefilter*proj, num_group=num_group_11, name=('%s_tower_2' %  name), suffix='_conv')
-    concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-# First Downsample
-
-# Field: (x-2)/2, original 38
-
-def Inception7B_V3(data,
-                basefilter=32, # Base=32
-                num_filters=[], # Length-4
-                num_group=1, num_group_11=1,
-                pool="max",
-                name=''):
-    
-    assert len(num_filters)==4          
-    num_3x3, num_d3x3_red, num_d3x3_1, num_d3x3_2 = tuple(num_filters)
-    
-    # Branch 1: Conv33
-    tower_3x3 = Conv(data, basefilter*num_3x3, num_group=num_group, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name))
-    # Branch 2: Conv11-Conv33-Conv33
-    tower_d3x3 = Conv(data, basefilter*num_d3x3_red, num_group=num_group_11, name=('%s_tower' % name), suffix='_conv')
-    tower_d3x3 = Conv(tower_d3x3, basefilter*num_d3x3_1, num_group=num_group, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1')
-    tower_d3x3 = Conv(tower_d3x3, basefilter*num_d3x3_2, num_group=num_group, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2')
-    # Branch 3: Pool33
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name))
-    concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-# Second Stage
-
-def Inception7C_V3(data,
-                basefilter=32, 
-                num_filters=[], # Length-10
-                num_group=1, num_group_11=1,
-                pool = 'avg',
-                name = ''):
-    
-    assert len(num_filters)==10
-    num_1x1, num_d7_red, num_d7_1, num_d7_2, num_q7_red, \
-        num_q7_1, num_q7_2, num_q7_3, num_q7_4, proj = tuple(num_filters)
-    
-    # Branch 1 : Conv11
-    tower_1x1 = Conv(data=data, num_filter=basefilter*num_1x1, kernel=(1, 1), name=('%s_conv' % name))
-    # Branch 2: Conv11-Conv17-Conv71
-    tower_d7 = Conv(data=data, num_filter=basefilter*num_d7_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d7 = Conv(data=tower_d7, num_filter=basefilter*num_d7_1, num_group=num_group, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1')
-    tower_d7 = Conv(data=tower_d7, num_filter=basefilter*num_d7_2, num_group=num_group, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2')
-    # Branch 3:Conv11-Conv17-Conv71-Conv17-Conv71
-    tower_q7 = Conv(data=data, num_filter=basefilter*num_q7_red, num_group=num_group_11, name=('%s_tower_1' % name), suffix='_conv')
-    tower_q7 = Conv(data=tower_q7, num_filter=basefilter*num_q7_1, num_group=num_group, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_q7 = Conv(data=tower_q7, num_filter=basefilter*num_q7_2, num_group=num_group, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_q7 = Conv(data=tower_q7, num_filter=basefilter*num_q7_3, num_group=num_group, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3')
-    tower_q7 = Conv(data=tower_q7, num_filter=basefilter*num_q7_4, num_group=num_group, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4')
-    # Branch4: Pooling-Conv11
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=basefilter*proj, num_group=num_group_11, kernel=(1, 1), name=('%s_tower_2' %  name), suffix='_conv')
-    # concat
-    concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-# Second Downsample
-
-# Field Change: (x-2)/2, original 18
-
-def Inception7D_V3(data,
-                basefilter=64, 
-                num_filters=[], # Length-6
-                num_group=1, num_group_11=1,
-                pool='max',
-                name=''):
-    
-    assert len(num_filters)==6
-    
-    num_3x3_red, num_3x3,\
-                num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3 = tuple(num_filters)
-    
-    # Branch 1: Conv11-Conv33
-    tower_3x3 = Conv(data=data, num_filter=basefilter*num_3x3_red, num_group=num_group_11, name=('%s_tower' % name), suffix='_conv')
-    tower_3x3 = Conv(data=tower_3x3, num_filter=basefilter*num_3x3, num_group=num_group, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
-    # Branch 2: Conv11-Conv17-Conv71-Conv33
-    tower_d7_3x3 = Conv(data=data, num_filter=basefilter*num_d7_3x3_red, num_group=num_group_11,  name=('%s_tower_1' % name), suffix='_conv')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=basefilter*num_d7_1, num_group=num_group, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=basefilter*num_d7_2, num_group=num_group, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=basefilter*num_d7_3x3, num_group=num_group, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3')
-    # Branch 3: Pool33
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    # concat
-    concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-# Doesn't change field
-
-def Inception7E_V3(data,
-                basefilter=64,
-                num_filters=[], # Length-9
-                num_group=1, num_group_11=1,
-                pool='max', 
-                name=''):
-    
-    assert len(num_filters)==9
-    
-    num_1x1, num_d3_red, num_d3_1, num_d3_2,\
-                num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2, proj = tuple(num_filters)
-    
-    
-    # Branch 1: Conv11
-    tower_1x1 = Conv(data=data, num_filter=basefilter*num_1x1, num_group=num_group_11, kernel=(1, 1), name=('%s_conv' % name))
-    # Branch 2: Conv11-Conv13-Conv31
-    tower_d3 = Conv(data=data, num_filter=basefilter*num_d3_red, num_group=num_group_11, name=('%s_tower' % name), suffix='_conv')
-    tower_d3_a = Conv(data=tower_d3, num_filter=basefilter*num_d3_1, num_group=num_group, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv')
-    tower_d3_b = Conv(data=tower_d3, num_filter=basefilter*num_d3_2, num_group=num_group, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1')
-    # Branch 3: Conv11-Conv33-Conv13-Conv31
-    tower_3x3_d3 = Conv(data=data, num_filter=basefilter*num_3x3_d3_red, num_group=num_group_11, name=('%s_tower_1' % name), suffix='_conv')
-    tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=basefilter*num_3x3, num_group=num_group, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=basefilter*num_3x3_d3_1, num_group=num_group, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv')
-    tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=basefilter*num_3x3_d3_2, num_group=num_group, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1')
-    # Branch 4: Pool33-Conv11
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=basefilter*proj, kernel=(1, 1), num_group=num_group_11, name=('%s_tower_2' %  name), suffix='_conv')
-    # concat
-    concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-
-def get_symbol_V3(num_classes=1000, 
-                  basefilter=16, num_group=1, num_group_11=1, num_group_stem=1,
-                  lastout = 8,
-                  dtype='float32', **kwargs):
-    data = mx.sym.Variable(name="data")
-    if dtype == 'float32':
-        data = mx.sym.identity(data=data, name='id')
-    else:
-        if dtype == 'float16':
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    # Stem Stage 1
-    
-    # 299 
-    conv = Conv(data, basefilter*2, num_group=num_group_stem, kernel=(3, 3), stride=(2, 2), name="conv")
-    # 149
-    conv_1 = Conv(conv, basefilter*2, num_group=num_group_stem,  kernel=(3, 3), name="conv_1")
-    # 147
-    conv_2 = Conv(conv_1, basefilter*4, num_group=num_group_stem, kernel=(3, 3), pad=(1, 1), name="conv_2")
-    # 147
-    pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
-    # 73
-    # Stem Stage 2
-    conv_3 = Conv(pool, basefilter*5, num_group=num_group_11, kernel=(1, 1), name="conv_3")
-    conv_4 = Conv(conv_3, basefilter*12, num_group=num_group_stem, kernel=(3, 3), name="conv_4")
-    # 71
-    pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
-    # 35
-    # Main Stage 1
-    in3a = Inception7A_V3(pool1, 
-                       basefilter=basefilter*1,
-                       num_filters=[4,4,6,6,3,4,2],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed")
-    in3b = Inception7A_V3(in3a, 
-                       basefilter=basefilter*1,
-                       num_filters=[4,4,6,6,3,4,2],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_1")
-    in3c = Inception7A_V3(in3b,
-                       basefilter=basefilter*1,
-                       num_filters=[4,4,6,6,3,4,2],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_2")
-    in3d = Inception7B_V3(in3c,
-                       basefilter=basefilter*2,
-                       num_filters=[12,2,3,3],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="max", name="mixed_3")
-    # Main Stage2
-    in4a = Inception7C_V3(in3d, 
-                       basefilter=basefilter*2,
-                       num_filters=[6,4,4,6,4,4,4,4,6,6],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_4")
-    in4b = Inception7C_V3(in4a, 
-                       basefilter=basefilter*2,
-                       num_filters=[6,5,5,6,5,5,5,5,6,6],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_5")
-    in4c = Inception7C_V3(in4b, 
-                       basefilter=basefilter*2,
-                       num_filters=[6,5,5,6,5,5,5,5,6,6],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_6")
-    in4d = Inception7C_V3(in4c,
-                       basefilter=basefilter*2,
-                       num_filters=[6,6,6,6,6,6,6,6,6,6],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_7")
-    in4e = Inception7D_V3(in4d, 
-                       basefilter=basefilter*4,
-                       num_filters=[3,5,3,3,3,3],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="max", name="mixed_8")
-    # Main Stage3
-    in5a = Inception7E_V3(in4e,
-                       basefilter=basefilter*4,
-                       num_filters=[5,6,6,6,7,6,6,6,3],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="avg", name="mixed_9")
-    in5b = Inception7E_V3(in5a, 
-                       basefilter=basefilter*4,
-                       num_filters=[5,6,6,6,7,6,6,6,3],
-                       num_group=num_group, num_group_11=num_group_11,
-                       pool="max", name="mixed_10")
-    # pool 
-    pool = mx.sym.Pooling(data=in5b, kernel=(lastout, lastout), stride=(1, 1), pool_type="avg", name="global_pool") # last=8
-    flatten = mx.sym.Flatten(data=pool, name="flatten")
-    fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
-    if dtype == 'float16':
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    softmax = mx.sym.SoftmaxOutput(data=fc1, name='softmax')
-    return softmax
-
diff --git a/src/marginalnet.py b/src/marginalnet.py
deleted file mode 100644
index dc44482..0000000
--- a/src/marginalnet.py
+++ /dev/null
@@ -1,310 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import mxnet as mx
-import numpy as np
-
-
-
-def _Conv(data, num_filter, kernel, stride, pad, name, no_bias=False, workspace=256):
-    _weight = mx.symbol.Variable(name+'_weight')
-    _bias = mx.symbol.Variable(name+'_bias', lr_mult=2.0, wd_mult=0.0)
-    body = mx.sym.Convolution(data=data, weight = _weight, bias = _bias, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias = no_bias, workspace = workspace, name = name)
-    return body
-
-def Conv(**kwargs):
-    name = kwargs.get('name')
-    _weight = mx.symbol.Variable(name+'_weight')
-    _bias = mx.symbol.Variable(name+'_bias', lr_mult=2.0, wd_mult=0.0)
-    body = mx.sym.Convolution(weight = _weight, bias = _bias, **kwargs)
-    return body
-
-
-def Act(data, name):
-    body = mx.sym.LeakyReLU(data = data, act_type='prelu', name = name)
-    return body
-
-def resnet_unit0(data, num_filter, name, workspace = 256):
-  bn_mom = 0.9
-  body = Conv(data=data, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn')
-  body = Act(data=body, name=name+'_relu')
-  return body
-
-def resnet_unit1(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = Conv(data=data, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  if dim_match:
-    body = body+shortcut
-  body = Act(data=body, name=name+'_relu2')
-  return body
-
-def resnet_unit2(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv1", workspace=workspace)
-  #body = mx.symbol.Dropout(data=body, p=0.2)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu2')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  if dim_match:
-    body = body+shortcut
-  return body
-
-def resnet_unit3(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-  if dim_match:
-    body = body+shortcut
-  return body
-
-def resnet_unit4(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=data, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu2')
-  body = Conv(data=body, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-  body = Act(data=body, name=name+'_relu3')
-  body = Conv(data=body, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0, 0),
-                            name=name+"_conv3", workspace=workspace)
-  if dim_match:
-    body = body+shortcut
-  return body
-
-def resnet_unit5(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = Conv(data=data, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=int(num_filter*0.5), kernel=(3,3), stride=(1,1), pad=(1, 1), num_group=32,
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu2')
-  body = Conv(data=body, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_conv3", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-  if dim_match:
-    body = body+shortcut
-  body = Act(data=body, name=name+'_relu3')
-  return body
-
-def resnet_unit6(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  shortcut = data
-  body = Conv(data=data, num_filter=num_filter*4, kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=num_filter*4, kernel=(3,3), stride=(1,1), pad=(1, 1), num_group=32,
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu2')
-  body = Conv(data=body, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_conv3", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-  if dim_match:
-    body = body+shortcut
-  body = Act(data=body, name=name+'_relu3')
-  return body
-
-def resnet_unit7(data, num_filter, name, dim_match=True, workspace = 256):
-  #se block
-  bn_mom = 0.9
-  shortcut = data
-  body = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  body = Act(data=body, name=name+'_relu1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  res = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-
-  body = mx.sym.Pooling(data=res, global_pool=True, kernel=(7, 7), pool_type='avg', name=name+'_se_pool1')
-  body = Conv(data=body, num_filter=num_filter//16, kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_se_conv1", workspace=workspace)
-  body = Act(data=body, name=name+'_se_relu1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
-                            name=name+"_se_conv2", workspace=workspace)
-  body = mx.symbol.Activation(data=body, act_type='sigmoid', name=name+"_se_sigmoid")
-  body = mx.symbol.broadcast_mul(res, body)
-
-  if dim_match:
-    body = body+shortcut
-  return body
-
-def resnet_unit100(data, num_filter, name, dim_match=True, workspace = 256):
-  bn_mom = 0.9
-  body = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn1')
-  body = Conv(data=body, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv1", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn2')
-  act = Act(data=body, name=name+'_relu1')
-  body = Conv(data=act, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1, 1),
-                            name=name+"_conv2", workspace=workspace)
-  body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name+'_bn3')
-  if not dim_match:
-    shortcut = Conv(data=act, num_filter=num_filter, kernel=(1,1), pad=(0,0), name=name+"_shortcut", workspace=workspace)
-  else:
-    shortcut = data
-  body = body+shortcut
-  return body
-
-def resnet_unit(rtype, data, num_filter, name, dim_match=True, workspace = 256):
-  if rtype==1:
-    return resnet_unit1(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==2:
-    return resnet_unit2(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==3:
-    return resnet_unit3(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==4:
-    return resnet_unit4(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==5:
-    return resnet_unit5(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==6:
-    return resnet_unit6(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==7:
-    return resnet_unit7(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  elif rtype==100:
-    return resnet_unit100(data=data, num_filter=num_filter, name=name, dim_match=dim_match, workspace=workspace)
-  else:
-    assert(False)
-
-def resnet(data, units, filters, rtype, workspace):
-  body = resnet_unit0(data=data, num_filter=32, name="stage%d_unit%d"%(0, 0))
-  for i in xrange(len(units)):
-    f = filters[i]
-    dim_match = False
-    if i==0:
-      dim_match = True
-    if rtype>=100:
-      body = resnet_unit(rtype=rtype, data=body, num_filter=f, name="stage%d_unit%d"%(i+1, 0), dim_match=dim_match) # do not connect to last layer, dim not match
-    else:
-      body = resnet_unit0(data=body, num_filter=f, name="stage%d_unit%d"%(i+1, 0)) # do not connect to last layer, dim not match
-    body = mx.sym.Pooling(data=body, kernel=(2, 2), stride=(2,2), pad=(0,0), pool_type='max', name="stage%d_pool"%(i+1))
-    for j in xrange(units[i]):
-      body = resnet_unit(rtype=rtype, data=body, num_filter=f, name="stage%d_unit%d"%(i+1, j+1), dim_match=True)
-
-  return body
-
-def get_symbol(num_classes, num_layers, conv_workspace=256):
-    data = mx.symbol.Variable('data')
-    bn_mom = 0.9
-    if num_layers<29:
-      data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
-    else:
-      data = data-127.5
-      data = data*0.0078125
-    units = [1,2,5,3] # all number of layers = sum(units)*2+len(units)+1
-    filter_list = [64, 128, 256, 512]
-    rtype = 1
-    ftype = 1
-    if num_layers==27:
-      rtype = 1
-    elif num_layers==28:
-      rtype = 2
-    elif num_layers==29:
-      rtype = 3
-      #use_last_bn = False
-      #use_dropout = False
-    elif num_layers==30:
-      filter_list = [64, 256, 512, 1024]
-      rtype = 3
-    elif num_layers==31:
-      rtype = 100
-    elif num_layers==51:
-      units = [2,3,15,3]
-      rtype = 3
-    elif num_layers==52:
-      filter_list = [64, 256, 512, 1024]
-      units = [2,3,15,3]
-      rtype = 3
-    elif num_layers==53: #se block
-      units = [2,3,15,3]
-      rtype = 7
-    elif num_layers==74:
-      units = [2,3,15,3]
-      rtype = 4
-    elif num_layers==75:
-      units = [2,3,15,3]
-      rtype = 5
-    elif num_layers==76:
-      filter_list = [16, 32, 64, 128]
-      units = [2,3,15,3]
-      rtype = 6
-    else:
-      assert(False)
-
-    body = resnet(data = data, units = units, filters = filter_list, rtype=rtype, workspace = conv_workspace)
-    _weight = mx.symbol.Variable("fc1_weight")
-    _bias = mx.symbol.Variable("fc1_bias", lr_mult=2.0, wd_mult=0.0)
-    if ftype==0:
-      fc1 = mx.sym.FullyConnected(data=body, weight=_weight, bias=_bias, num_hidden=num_classes, name='fc1')
-    elif ftype==1:
-      body = mx.symbol.Dropout(data=body, p=0.4)
-      fc1 = mx.sym.FullyConnected(data=body, weight=_weight, bias=_bias, num_hidden=num_classes, name='pre_fc1')
-      fc1 = mx.sym.BatchNorm(data=fc1, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='fc1')
-    else:
-      body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
-      body = mx.sym.Activation(data=body, act_type='relu', name='relu1')
-      body = mx.sym.Pooling(data=body, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
-      body = mx.sym.Flatten(data=body)
-      fc1 = mx.sym.FullyConnected(data=body, weight=_weight, bias=_bias, num_hidden=num_classes, name='fc1')
-
-    return fc1
-
-def init_weights(sym, data_shape_dict, num_layers):
-  arg_name = sym.list_arguments()
-  aux_name = sym.list_auxiliary_states()
-  arg_shape, aaa, aux_shape = sym.infer_shape(**data_shape_dict)
-  #print(data_shape_dict)
-  #print(arg_name)
-  #print(arg_shape)
-  arg_params = {}
-  aux_params = None
-  #print(aaa)
-  #print(aux_shape)
-  arg_shape_dict = dict(zip(arg_name, arg_shape))
-  aux_shape_dict = dict(zip(aux_name, aux_shape))
-  #print(aux_shape)
-  #print(aux_params)
-  #print(arg_shape_dict)
-  for k,v in arg_shape_dict.iteritems():
-    #print('find', k)
-    if k.endswith('_weight') and k.find('_conv')>=0:
-      if not k.find('_unit0_')>=0:
-        arg_params[k] = mx.random.normal(0, 0.01, shape=v)
-        print('init', k)
-    if k.endswith('_bias'):
-      arg_params[k] = mx.nd.zeros(shape=v)
-      print('init', k)
-  return arg_params, aux_params
-
diff --git a/src/resnet.py b/src/resnet.py
deleted file mode 100644
index be49860..0000000
--- a/src/resnet.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-'''
-import mxnet as mx
-import numpy as np
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same, otherwise means differ
-    name : str
-        Base name of the operators
-    workspace : int
-        Workspace used in convolution operator
-    """
-    if bottle_neck:
-        # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
-        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-        conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
-                                   no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
-        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
-        conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=stride, pad=(1,1),
-                                   no_bias=True, workspace=workspace, name=name + '_conv2')
-        bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
-        act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
-        conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
-                                   workspace=workspace, name=name + '_conv3')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        return conv3 + shortcut
-    else:
-        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-        conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
-        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
-        conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv2')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        return conv2 + shortcut
-
-def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, dtype='float32', memonger=False):
-    """Return ResNet symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Ouput size of symbol
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    workspace : int
-        Workspace used in convolution operator
-    dtype : str
-        Precision (float32 or float16)
-    """
-    num_unit = len(units)
-    assert(num_unit == num_stages)
-    data = mx.sym.Variable(name='data')
-    if dtype == 'float32':
-        data = mx.sym.identity(data=data, name='id')
-    else:
-        if dtype == 'float16':
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
-    (nchannel, height, width) = image_shape
-    if height <= 32:            # such as cifar10
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
-                                  no_bias=True, name="conv0", workspace=workspace)
-    else:                       # often expected to be 224 such as imagenet
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
-                                  no_bias=True, name="conv0", workspace=workspace)
-        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
-        body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
-        body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
-
-    for i in range(num_stages):
-        body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
-                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
-                             memonger=memonger)
-        for j in range(units[i]-1):
-            body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
-                                 bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
-    bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
-    relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
-    # Although kernel is not used here when global_pool=True, we should put one
-    pool1 = mx.sym.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
-    flat = mx.sym.Flatten(data=pool1)
-    fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
-    if dtype == 'float16':
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    return mx.sym.SoftmaxOutput(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype='float32', **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    image_shape = [int(l) for l in image_shape.split(',')]
-    (nchannel, height, width) = image_shape
-    if height <= 28:
-        num_stages = 3
-        if (num_layers-2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers-2)//9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers-2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers-2)//6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-
-    return resnet(units       = units,
-                  num_stages  = num_stages,
-                  filter_list = filter_list,
-                  num_classes = num_classes,
-                  image_shape = image_shape,
-                  bottle_neck = bottle_neck,
-                  workspace   = conv_workspace,
-                  dtype       = dtype)
diff --git a/src/resnext.py b/src/resnext.py
deleted file mode 100644
index 5974943..0000000
--- a/src/resnext.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He. "Aggregated Residual Transformations for Deep Neural Network"
-'''
-import mxnet as mx
-import numpy as np
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, num_group=32, bn_mom=0.9, workspace=256, memonger=False):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same, otherwise means differ
-    name : str
-        Base name of the operators
-    workspace : int
-        Workspace used in convolution operator
-    """
-    if bottle_neck:
-        # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
-
-        conv1 = mx.sym.Convolution(data=data, num_filter=int(num_filter*0.5), kernel=(1,1), stride=(1,1), pad=(0,0),
-                                      no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-
-
-        conv2 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.5), num_group=num_group, kernel=(3,3), stride=stride, pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv2')
-        bn2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
-        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
-
-
-        conv3 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
-                                   workspace=workspace, name=name + '_conv3')
-        bn3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
-
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut_conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-            shortcut = mx.sym.BatchNorm(data=shortcut_conv, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_sc_bn')
-
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        eltwise =  bn3 + shortcut
-        return mx.sym.Activation(data=eltwise, act_type='relu', name=name + '_relu')
-    else:
-
-        conv1 = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-
-
-        conv2 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv2')
-        bn2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
-
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut_conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-            shortcut = mx.sym.BatchNorm(data=shortcut_conv, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_sc_bn')
-
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        eltwise = bn2 + shortcut
-        return mx.sym.Activation(data=eltwise, act_type='relu', name=name + '_relu')
-
-def resnext(units, num_stages, filter_list, num_classes, num_group, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, dtype='float32', memonger=False):
-    """Return ResNeXt symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Ouput size of symbol
-    num_groupes: int
-    Number of conv groups
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    workspace : int
-        Workspace used in convolution operator
-    dtype : str
-        Precision (float32 or float16)
-    """
-    num_unit = len(units)
-    assert(num_unit == num_stages)
-    data = mx.sym.Variable(name='data')
-    if dtype == 'float32':
-        data = mx.sym.identity(data=data, name='id')
-    else:
-        if dtype == 'float16':
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
-    (nchannel, height, width) = image_shape
-    if height <= 32:            # such as cifar10
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
-                                  no_bias=True, name="conv0", workspace=workspace)
-    else:                       # often expected to be 224 such as imagenet
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
-                                  no_bias=True, name="conv0", workspace=workspace)
-        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
-        body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
-        body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
-
-    for i in range(num_stages):
-        body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
-                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, num_group=num_group,
-                             bn_mom=bn_mom, workspace=workspace, memonger=memonger)
-        for j in range(units[i]-1):
-            body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
-                                 bottle_neck=bottle_neck, num_group=num_group, bn_mom=bn_mom, workspace=workspace, memonger=memonger)
-
-    pool1 = mx.sym.Pooling(data=body, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
-    flat = mx.sym.Flatten(data=pool1)
-    fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
-    if dtype == 'float16':
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    return mx.sym.SoftmaxOutput(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers, image_shape, num_group=32, conv_workspace=256, dtype='float32', **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    image_shape = [int(l) for l in image_shape.split(',')]
-    (nchannel, height, width) = image_shape
-    if height <= 32:
-        num_stages = 3
-        if (num_layers-2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers-2)//9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers-2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers-2)//6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-
-    return resnext(units      = units,
-                  num_stages  = num_stages,
-                  filter_list = filter_list,
-                  num_classes = num_classes,
-                  num_group   = num_group,
-                  image_shape = image_shape,
-                  bottle_neck = bottle_neck,
-                  workspace   = conv_workspace,
-                  dtype       = dtype)
diff --git a/src/spherenet.py b/src/spherenet.py
deleted file mode 100644
index 43d37cb..0000000
--- a/src/spherenet.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import mxnet as mx
-import numpy as np
-import math
-from mxnet.base import _Null
-
-def conv_main(data, units, filters, workspace):
-  body = data
-  for i in xrange(len(units)):
-    f = filters[i]
-    _weight = mx.symbol.Variable("conv%d_%d_weight"%(i+1, 1), lr_mult=1.0)
-    _bias = mx.symbol.Variable("conv%d_%d_bias"%(i+1, 1), lr_mult=2.0, wd_mult=0.0)
-    body = mx.sym.Convolution(data=body, weight = _weight, bias = _bias, num_filter=f, kernel=(3, 3), stride=(2,2), pad=(1, 1),
-                              name= "conv%d_%d"%(i+1, 1), workspace=workspace)
-
-
-    #body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=0.9, name='bn%d_%d'%(i+1, 1))
-
-    body = mx.sym.LeakyReLU(data = body, act_type='prelu', name = "relu%d_%d" % (i+1, 1))
-    idx = 2
-    for j in xrange(units[i]):
-      _body = mx.sym.Convolution(data=body, no_bias=True, num_filter=f, kernel=(3, 3), stride=(1,1), pad=(1, 1),
-                                name= "conv%d_%d"%(i+1, idx), workspace=workspace)
-
-      #_body = mx.sym.BatchNorm(data=_body, fix_gamma=False, eps=2e-5, momentum=0.9, name='bn%d_%d'%(i+1, idx))
-
-      _body = mx.sym.LeakyReLU(data = _body, act_type='prelu', name = "relu%d_%d" % (i+1, idx))
-      idx+=1
-      _body = mx.sym.Convolution(data=_body, no_bias=True, num_filter=f, kernel=(3, 3), stride=(1,1), pad=(1, 1),
-                                name= "conv%d_%d"%(i+1, idx), workspace=workspace)
-      #_body = mx.sym.BatchNorm(data=_body, fix_gamma=False, eps=2e-5, momentum=0.9, name='bn%d_%d'%(i+1, idx))
-      _body = mx.sym.LeakyReLU(data = _body, act_type='prelu', name = "relu%d_%d" % (i+1, idx))
-      idx+=1
-      body = body+_body
-
-      #body = mx.sym.LeakyReLU(data = body, act_type='prelu', name = "relu%d_%d" % (i+1, idx)) #modify
-      #idx+=1
-
-
-  return body
-
-def get_symbol(num_classes, num_layers, conv_workspace=256, **kwargs):
-  if num_layers==64:
-    units = [3,8,16,3]
-    filters = [64,128,256,512]
-  elif num_layers==20:
-    units = [1,2,4,1]
-    filters = [64,128,256,512]
-    #filters = [64, 256, 512, 1024]
-  elif num_layers==36:
-    units = [2,4,8,2]
-    filters = [64,128,256,512]
-    #filters = [64, 256, 512, 1024]
-  elif num_layers==60:
-    units = [3,8,14,3]
-    filters = [64,128,256,512]
-  elif num_layers==104:
-    units = [3,8,36,3]
-    filters = [64,128,256,512]
-    #filters = [64, 256, 512, 1024]
-  data = mx.symbol.Variable('data')
-  data = data-127.5
-  data = data*0.0078125
-  body = conv_main(data = data, units = units, filters = filters, workspace = conv_workspace)
-  #modify begin
-
-  #body = mx.sym.Pooling(data=body, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
-  #body = mx.sym.Flatten(data=body)
-
-  #modify end
-
-  _weight = mx.symbol.Variable("fc1_weight", lr_mult=1.0)
-  _bias = mx.symbol.Variable("fc1_bias", lr_mult=2.0, wd_mult=0.0)
-  fc1 = mx.sym.FullyConnected(data=body, weight=_weight, bias=_bias, num_hidden=num_classes, name='fc1')
-  return fc1
-  
-def init_weights(sym, data_shape_dict, num_layers):
-  arg_name = sym.list_arguments()
-  aux_name = sym.list_auxiliary_states()
-  arg_shape, aaa, aux_shape = sym.infer_shape(**data_shape_dict)
-  #print(data_shape_dict)
-  #print(arg_name)
-  #print(arg_shape)
-  arg_params = {}
-  aux_params = None
-  #print(aaa)
-  #print(aux_shape)
-  arg_shape_dict = dict(zip(arg_name, arg_shape))
-  aux_shape_dict = dict(zip(aux_name, aux_shape))
-  #print(aux_shape)
-  #print(aux_params)
-  #print(arg_shape_dict)
-  for k,v in arg_shape_dict.iteritems():
-    if k.startswith('conv') and k.endswith('_weight'):
-      if not k.find('_1_')>=0:
-        if num_layers<100:
-          arg_params[k] = mx.random.normal(0, 0.01, shape=v)
-          print('init', k)
-    if k.endswith('_bias'):
-      arg_params[k] = mx.nd.zeros(shape=v)
-      print('init', k)
-  return arg_params, aux_params
-
diff --git a/src/train_softmax.py b/src/train_softmax.py
index d74b77c..c45000e 100644
--- a/src/train_softmax.py
+++ b/src/train_softmax.py
@@ -19,6 +19,7 @@ import argparse
 import mxnet.optimizer as optimizer
 #sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
 sys.path.append(os.path.join(os.path.dirname(__file__), 'eval'))
+sys.path.append(os.path.join(os.path.dirname(__file__), 'symbols'))
 import spherenet
 import marginalnet
 import inceptions
diff --git a/src/xception.py b/src/xception.py
deleted file mode 100644
index f9d17f3..0000000
--- a/src/xception.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-
-Xception network, suitable for images with around 299 x 299 (original version)
-
-Reference:
-
-François Chollet. Xception: Deep Learning with Depthwise Separable Convlutions. arXiv preprint. https://arxiv.org/pdf/1610.02357v3.pdf
-
-I refered one version of MXNet from u1234x1234 https://github.com/u1234x1234/mxnet-xception/blob/master/symbol_xception.py
-
-Modified by Lin Xiong, Sep-3, 2017 for images 224 x 224
-There are some slightly differences with u1234x1234's version (pooling layer) and original version (no dropout layer).
-
-In order to accelerate computation, we use smaller parameters than original paper.
-
-"""
-
-import mxnet as mx
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix='', withRelu=False, withBn=True, bn_mom=0.9, workspace=256):
-    conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad,
-                              name='%s%s_conv2d' % (name, suffix), workspace=workspace)
-    if withBn:
-        conv = mx.sym.BatchNorm(data=conv, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='%s%s_bn' % (name, suffix))
-    if withRelu:
-        conv = mx.sym.Activation(data=conv, act_type='relu', name='%s%s_relu' % (name, suffix))
-    return conv
-
-def Separable_Conv(data, num_in_channel, num_out_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name=None, suffix='', depth_mult=1, withBn=True, bn_mom=0.9, workspace=256):
-    # original version of Separable Convolution
-    # depthwise convolution
-    #channels       = mx.sym.split(data=data, axis=1, num_outputs=num_in_channel) # for new version of mxnet > 0.8
-    channels       = mx.sym.SliceChannel(data=data, axis=1, num_outputs=num_in_channel) # for old version of mxnet <= 0.8
-    depthwise_outs = [mx.sym.Convolution(data=channels[i], num_filter=depth_mult, kernel=kernel, 
-                           stride=stride, pad=pad, name=name+'_depthwise_kernel_'+str(i), workspace=workspace)
-                           for i in range(num_in_channel)]
-    depthwise_out = mx.sym.Concat(*depthwise_outs)
-    # pointwise convolution
-    pointwise_out = Conv(data=depthwise_out, num_filter=num_out_channel, name=name+'_pointwise_kernel', withBn=False, bn_mom=0.9, workspace=256)
-    if withBn:
-        pointwise_out = mx.sym.BatchNorm(data=pointwise_out, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='%s%s_bn' % (name, suffix))
-    return pointwise_out
-
-
-
-def Circle_Middle(name, data, 
-                  num_filter,
-                  bn_mom=0.9,
-                  round=8):
-    b = data
-    for i in xrange(round):
-        residual = b
-        prefix = name + '_block' + ('_%d' % i)
-
-        b = mx.sym.Activation(data=b, act_type='relu', name=prefix + '_sepconv1_relu')
-        b = Separable_Conv(data=b, num_in_channel=num_filter, num_out_channel=num_filter, name=prefix + '_sepconv1', withBn=True, bn_mom=bn_mom, workspace=256)
-        b = mx.sym.Activation(data=b, act_type='relu', name=prefix + '_sepconv2_relu')
-        b = Separable_Conv(data=b, num_in_channel=num_filter, num_out_channel=num_filter, name=prefix + '_sepconv2', withBn=True, bn_mom=bn_mom, workspace=256)
-        b = mx.sym.Activation(data=b, act_type='relu', name=prefix + '_sepconv3_relu')
-        b = Separable_Conv(data=b, num_in_channel=num_filter, num_out_channel=num_filter, name=prefix + '_sepconv3', withBn=True, bn_mom=bn_mom, workspace=256)
-
-        b = b + residual
-
-    return b
-
-
-def get_xception_symbol(num_classes=1000):
-    # input shape 229*229*3 (old)
-    # input shape 224*224*3 (new)
-    
-    #filter_list=[64, 128, 256, 728, 1024, 1536, 2048]     # original version
-    filter_list=[64, 64, 128, 364, 512, 768, 1024]  # smaller one
-
-    # Entry flow
-    data = mx.sym.Variable('data')
-
-    # block 1
-    block1 = Conv(data=data, num_filter=int(filter_list[0]*0.5), kernel=(3, 3), stride=(2, 2), pad=(1, 1), name='Entry_flow_b1_conv1', 
-                  withRelu=True, withBn=True, bn_mom=0.9, workspace=256)
-    block1 = Conv(data=block1, num_filter=filter_list[0], kernel=(3, 3), pad=(1, 1), name='Entry_flow_b1_conv2',
-                  withRelu=True, withBn=True, bn_mom=0.9, workspace=256)
-
-    # block 2
-    rs2    = Conv(data=block1, num_filter=filter_list[1], stride=(2, 2), name='Entry_flow_b2_conv1',
-                  withBn=True, bn_mom=0.9, workspace=256)
-    block2 = Separable_Conv(block1, num_in_channel=filter_list[0], num_out_channel=filter_list[1], name='Entry_flow_b2_sepconv1', withBn=True, bn_mom=0.9, workspace=256)
-    block2 = mx.sym.Activation(data=block2, act_type='relu', name='Entry_flow_b2_sepconv1_relu')
-    block2 = Separable_Conv(block2, num_in_channel=filter_list[1], num_out_channel=filter_list[1], name='Entry_flow_b2_sepconv2', withBn=True, bn_mom=0.9, workspace=256)
-    block2 = mx.sym.Pooling(data=block2, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max', name='Entry_flow_b2_pool')
-    block2 = block2 + rs2
-
-    # block 3
-    rs3    = Conv(data=block2, num_filter=filter_list[2], stride=(2, 2), name='Entry_flow_b3_conv1',
-                  withBn=True, bn_mom=0.9, workspace=256)
-    block3 = mx.sym.Activation(data=block2, act_type='relu', name='Entry_flow_b3_sepconv1_relu')
-    block3 = Separable_Conv(block3, num_in_channel=filter_list[1], num_out_channel=filter_list[2], name='Entry_flow_b3_sepconv1', withBn=True, bn_mom=0.9, workspace=256)
-    block3 = mx.sym.Activation(data=block3, act_type='relu', name='Entry_flow_b3_sepconv2_relu')
-    block3 = Separable_Conv(block3, num_in_channel=filter_list[2], num_out_channel=filter_list[2], name='Entry_flow_b3_sepconv2', withBn=True, bn_mom=0.9, workspace=256)
-    block3 = mx.sym.Pooling(data=block3, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max', name='Entry_flow_b3_pool')
-    block3 = block3 + rs3
-
-    # block 4
-    rs4    = Conv(data=block3, num_filter=filter_list[3], stride=(2, 2), name='Entry_flow_b4_conv1',
-                  withBn=True, bn_mom=0.9, workspace=256)
-    block4 = mx.sym.Activation(data=block3, act_type='relu', name='Entry_flow_b4_sepconv1_relu')
-    block4 = Separable_Conv(block4, num_in_channel=filter_list[2], num_out_channel=filter_list[3], name='Entry_flow_b4_sepconv1', withBn=True, bn_mom=0.9, workspace=256)
-    block4 = mx.sym.Activation(data=block4, act_type='relu', name='Entry_flow_b4_sepconv2_relu')
-    block4 = Separable_Conv(block4, num_in_channel=filter_list[3], num_out_channel=filter_list[3], name='Entry_flow_b4_sepconv2', withBn=True, bn_mom=0.9, workspace=256)
-    block4 = mx.sym.Pooling(data=block4, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max', name='Entry_flow_b4_pool')
-    block4 = block4 + rs4
-
-    # Middle flow
-    block_m_f = Circle_Middle('Middle_flow', block4,
-                      filter_list[3],
-                      0.9,
-                      8)
-    # Exit flow
-    rs5    = Conv(data=block_m_f, num_filter=filter_list[4], stride=(2, 2), name='Exit_flow_b5_conv1',
-                  withBn=True, bn_mom=0.9, workspace=256)
-    block5 = mx.sym.Activation(data=block_m_f, act_type='relu', name='Exit_flow_b5_sepconv1_relu')
-    block5 = Separable_Conv(block5, num_in_channel=filter_list[3], num_out_channel=filter_list[3], name='Exit_flow_b5_sepconv1', withBn=True, bn_mom=0.9, workspace=256)
-    block5 = mx.sym.Activation(data=block5, act_type='relu', name='Exit_flow_b5_sepconv2_relu')
-    block5 = Separable_Conv(block5, num_in_channel=filter_list[3], num_out_channel=filter_list[4], name='Exit_flow_b5_sepconv2', withBn=True, bn_mom=0.9, workspace=256)
-    block5 = mx.sym.Pooling(data=block5, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max', name='Entry_flow_b5_pool')
-    block5 = block5 + rs5
-
-    block6 = Separable_Conv(block5, num_in_channel=filter_list[4], num_out_channel=filter_list[5], name='Exit_flow_b6_sepconv1', withBn=True, bn_mom=0.9, workspace=256)
-    block6 = mx.sym.Activation(data=block6, act_type='relu', name='Exit_flow_b6_sepconv1_relu')
-    block6 = Separable_Conv(block6, num_in_channel=filter_list[5], num_out_channel=filter_list[6], name='Exit_flow_b6_sepconv2', withBn=True, bn_mom=0.9, workspace=256)
-    block6 = mx.sym.Activation(data=block6, act_type='relu', name='Exit_flow_b6_sepconv2_relu')
-
-    pool    = mx.sym.Pooling(data=block6, global_pool=True, kernel=(7, 7), stride=(1, 1), pad=(0, 0), pool_type="avg", name="global_pool")
-    dropout = mx.sym.Dropout(data=pool, p=0.2)
-    flatten = mx.sym.Flatten(data=dropout)
-
-    # output
-    fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
-    softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
-    return fc1, softmax
-
-
-
-
-
-
-
-
-
-
-
-
-
-