diff --git a/gluon/train.py b/gluon/train.py
index a51e2ec..2a1d5db 100644
--- a/gluon/train.py
+++ b/gluon/train.py
@@ -555,8 +555,12 @@ def train_net(args):
         fc7 = net(data, label)
       else:
         fc7 = net(data)
-      softmax = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid')
-      sym = softmax
+      #sym = mx.symbol.SoftmaxOutput(data=fc7, label = label, name='softmax', normalization='valid')
+      ceop = gluon.loss.SoftmaxCrossEntropyLoss()
+      loss = ceop(fc7, label) 
+      #loss = loss/args.per_batch_size
+      loss = mx.sym.mean(loss)
+      sym = mx.sym.Group( [mx.symbol.BlockGrad(fc7), mx.symbol.MakeLoss(loss, name='softmax')] )
 
     def _batch_callback():
       mbatch = global_step[0]
@@ -643,7 +647,8 @@ def train_net(args):
       loss_weight = 1.0
       if args.task=='age':
         loss_weight = 1.0/AGE
-      loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight)
+      #loss = gluon.loss.SoftmaxCrossEntropyLoss(weight = loss_weight)
+      loss = nd.SoftmaxOutput
       #loss = gluon.loss.SoftmaxCrossEntropyLoss()
       while True:
           #trainer = update_learning_rate(opt.lr, trainer, epoch, opt.lr_factor, lr_steps)
@@ -695,7 +700,9 @@ def train_net(args):
                   ag.backward(Ls)
               #trainer.step(batch.data[0].shape[0], ignore_stale_grad=True)
               #trainer.step(args.ctx_num)
-              trainer.step(batch.data[0].shape[0])
+              n = batch.data[0].shape[0]
+              #print(n,n)
+              trainer.step(n)
               metric.update(label, outputs)
               if i>0 and i%20==0:
                   name, acc = metric.get()
@@ -705,7 +712,7 @@ def train_net(args):
                   else:
                     logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f'%(
                                    num_epochs, i, args.batch_size/(time.time()-btic), name[0], acc[0]))
-                  metric.reset()
+                  #metric.reset()
               btic = time.time()
 
           epoch_time = time.time()-tic