diff --git a/common/flops_counter.py b/common/flops_counter.py
index 46f682c..64469ec 100644
--- a/common/flops_counter.py
+++ b/common/flops_counter.py
@@ -83,6 +83,17 @@ def count_flops(sym, **data_shapes):
 
   return FLOPs
 
+def flops_str(FLOPs):
+  preset = [ (1e12, 'T'), (1e9, 'G'), (1e6, 'M'), (1e3, 'K') ]
+
+  for p in preset:
+    if FLOPs//p[0]>0:
+      N = FLOPs/p[0]
+      ret = "%.1f%s"%(N, p[1])
+      return ret
+  ret = "%.1f"%(FLOPs)
+  return ret
+
 if __name__ == '__main__':
   parser = argparse.ArgumentParser(description='flops counter')
   # general
diff --git a/recognition/sample_config.py b/recognition/sample_config.py
index b6963ee..5b3cda7 100644
--- a/recognition/sample_config.py
+++ b/recognition/sample_config.py
@@ -26,6 +26,7 @@ config.data_cutoff = False
 config.data_color = 0
 config.data_images_filter = 0
 config.count_flops = True
+config.memonger = False #not work now
 
 
 # network settings
@@ -35,6 +36,11 @@ network.r100 = edict()
 network.r100.net_name = 'fresnet'
 network.r100.num_layers = 100
 
+network.r100fc = edict()
+network.r100fc.net_name = 'fresnet'
+network.r100fc.num_layers = 100
+network.r100fc.net_output = 'FC'
+
 network.r50 = edict()
 network.r50.net_name = 'fresnet'
 network.r50.num_layers = 50
@@ -107,6 +113,13 @@ dataset.emore.num_classes = 85742
 dataset.emore.image_shape = (112,112,3)
 dataset.emore.val_targets = ['lfw', 'cfp_fp', 'agedb_30']
 
+dataset.retina = edict()
+dataset.retina.dataset = 'retina'
+dataset.retina.dataset_path = '../datasets/ms1m-retinaface-t1'
+dataset.retina.num_classes = 93431
+dataset.retina.image_shape = (112,112,3)
+dataset.retina.val_targets = ['lfw', 'cfp_fp', 'agedb_30']
+
 loss = edict()
 loss.softmax = edict()
 loss.softmax.loss_name = 'softmax'
diff --git a/recognition/symbol/fresnet.py b/recognition/symbol/fresnet.py
index 3f7fc14..1df4ffe 100644
--- a/recognition/symbol/fresnet.py
+++ b/recognition/symbol/fresnet.py
@@ -31,6 +31,7 @@ import os
 import mxnet as mx
 import numpy as np
 import symbol_utils
+import memonger
 import sklearn
 sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 from config import config
@@ -495,6 +496,7 @@ def resnet(units, num_stages, filter_list, num_classes, bottle_neck):
         'version_act': config.net_act,
         'bn_mom': bn_mom,
         'workspace': workspace,
+        'memonger': config.memonger,
         }
     """Return ResNet symbol of
     Parameters
@@ -519,7 +521,8 @@ def resnet(units, num_stages, filter_list, num_classes, bottle_neck):
     fc_type = version_output
     version_unit = kwargs.get('version_unit', 3)
     act_type = kwargs.get('version_act', 'prelu')
-    print(version_se, version_input, version_output, version_unit, act_type)
+    memonger = kwargs.get('memonger', False)
+    print(version_se, version_input, version_output, version_unit, act_type, memonger)
     num_unit = len(units)
     assert(num_unit == num_stages)
     data = mx.sym.Variable(name='data')
@@ -618,11 +621,22 @@ def get_symbol():
     else:
         raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
 
-    return resnet(units       = units,
+    net = resnet(units       = units,
                   num_stages  = num_stages,
                   filter_list = filter_list,
                   num_classes = num_classes,
                   bottle_neck = bottle_neck)
 
+    if config.memonger:
+      dshape = (config.per_batch_size, config.image_shape[2], config.image_shape[0], config.image_shape[1])
+      net_mem_planned = memonger.search_plan(net, data=dshape)
+      old_cost = memonger.get_cost(net, data=dshape)
+      new_cost = memonger.get_cost(net_mem_planned, data=dshape)
+
+      print('Old feature map cost=%d MB' % old_cost)
+      print('New feature map cost=%d MB' % new_cost)
+      net = net_mem_planned
+    return net
+
 
 
diff --git a/recognition/train.py b/recognition/train.py
index fd50a1a..017a4ee 100644
--- a/recognition/train.py
+++ b/recognition/train.py
@@ -165,6 +165,8 @@ def train_net(args):
     args.batch_size = args.per_batch_size*args.ctx_num
     args.rescale_threshold = 0
     args.image_channel = config.image_shape[2]
+    config.batch_size = args.batch_size
+    config.per_batch_size = args.per_batch_size
 
     data_dir = config.dataset_path
     path_imgrec = None
@@ -197,7 +199,8 @@ def train_net(args):
       all_layers = sym.get_internals()
       _sym = all_layers['fc1_output']
       FLOPs = flops_counter.count_flops(_sym, data=(1,3,image_size[0],image_size[1]))
-      print('Network FLOPs: %d'%FLOPs)
+      _str = flops_counter.flops_str(FLOPs)
+      print('Network FLOPs: %s'%_str)
 
     #label_name = 'softmax_label'
     #label_shape = (args.batch_size,)
diff --git a/recognition/train_parall.py b/recognition/train_parall.py
index bfce15a..fd9a436 100644
--- a/recognition/train_parall.py
+++ b/recognition/train_parall.py
@@ -144,6 +144,8 @@ def train_net(args):
     args.batch_size = args.per_batch_size*args.ctx_num
     args.rescale_threshold = 0
     args.image_channel = config.image_shape[2]
+    config.batch_size = args.batch_size
+    config.per_batch_size = args.per_batch_size
     data_dir = config.dataset_path
     path_imgrec = None
     path_imglist = None
@@ -193,7 +195,8 @@ def train_net(args):
       all_layers = esym.get_internals()
       _sym = all_layers['fc1_output']
       FLOPs = flops_counter.count_flops(_sym, data=(1,3,image_size[0],image_size[1]))
-      print('Network FLOPs: %d'%FLOPs)
+      _str = flops_counter.flops_str(FLOPs)
+      print('Network FLOPs: %s'%_str)
 
     if config.num_workers==1:
       from parall_module_local_v1 import ParallModule