diff --git a/recognition/arcface_paddle/README_ch.md b/recognition/arcface_paddle/README_ch.md index 93d0803..88df669 100644 --- a/recognition/arcface_paddle/README_ch.md +++ b/recognition/arcface_paddle/README_ch.md @@ -75,6 +75,7 @@ Arcface-Paddle/MSiM_bin 注意: * 这里为了更加方便`Dataloader`读取数据,将原始的`train.rec`文件转化为很多`bin文件`,每个`bin文件`都唯一对应一张原始图像。如果您采集得到的文件均为原始的图像文件,那么可以参考`3.3节`中的内容完成原始图像文件到bin文件的转换。 +* 如果你的训练数据为原始的图像文件列表格式,那么在训练的时候,只需要将`is_bin`修改为`False`即可,下面的训练脚本中也会有具体的使用说明。 ### 3.3 原始图像文件与bin文件的转换 @@ -95,6 +96,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa 准备好配置文件后,可以通过以下方式开始训练过程。 ```bash +# 如果你的训练数据为bin文件格式的图像文件,可以使用下面的命令进行训练 python3.7 train.py \ --network 'MobileFaceNet_128' \ --lr=0.1 \ @@ -104,6 +106,18 @@ python3.7 train.py \ --logdir="log" \ --output "emore_arcface" \ --resume 0 + +# 如果你的训练数据为原始图像文件,可以将`is_bin`指定为False,进行训练 +python3.7 train.py \ + --network 'MobileFaceNet_128' \ + --lr=0.1 \ + --batch_size 512 \ + --weight_decay 2e-4 \ + --embedding_size 128 \ + --logdir="log" \ + --output "emore_arcface" \ + --resume 0 \ + --is_bin False ``` 上述命令中,需要传入如下参数: @@ -116,6 +130,7 @@ python3.7 train.py \ + `logdir`: VDL 输出 log 的存储路径, 默认值为 `"log"`; + `output`: 训练过程中的模型文件存储路径, 默认值为 `"emore_arcface"`; + `resume`: 是否恢复分类层的模型权重。 `1` 表示使用之前好的权重文件进行初始化, `0` 代表重新初始化。 如果想要恢复分类层的模型权重, 需要保证 `output` 目录下包含: `rank:0_softmax_weight_mom.pkl` 和 `rank:0_softmax_weight.pkl` 两个文件。 ++ `is_bin`: 训练数据是否为bin文件格式,默认为True。 * 训练过程中的输出 log 示例如下: diff --git a/recognition/arcface_paddle/README_en.md b/recognition/arcface_paddle/README_en.md index 0f13e38..8e8ac0d 100644 --- a/recognition/arcface_paddle/README_en.md +++ b/recognition/arcface_paddle/README_en.md @@ -79,7 +79,7 @@ If you want to use customed dataset, you can arrange your data according to the **Note:** * For using `Dataloader` api for reading data, we convert `train.rec` into many little `bin` files, each `bin` file denotes a single image. If your dataset just contains origin image files. You can either rewrite the dataloader file or refer to section 3.3 to convert the original image files to `bin` files. - +* If you train data is image format rather than `bin` format. For the training process, you just need to set the parameter `is_bin` as `False`. More details can be seen in the following training script. ### 3.3 Transform between original image files and bin files @@ -100,6 +100,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa After preparing the configuration file, The training process can be started in the following way. ```bash +# for the bin format training data python3.7 train.py \ --network 'MobileFaceNet_128' \ --lr=0.1 \ @@ -109,6 +110,18 @@ python3.7 train.py \ --logdir="log" \ --output "emore_arcface" \ --resume 0 + +# for the original image format training data +python3.7 train.py \ + --network 'MobileFaceNet_128' \ + --lr=0.1 \ + --batch_size 512 \ + --weight_decay 2e-4 \ + --embedding_size 128 \ + --logdir="log" \ + --output "emore_arcface" \ + --resume 0 \ + --is_bin False ``` Among them: @@ -121,6 +134,7 @@ Among them: + `logdir`: VDL log storage directory, default by `"log"`; + `output`: Model stored path, default by: `"emore_arcface"`; + `resume`: Restore the classification layer parameters. `1` represents recovery parameters, and `0` represents reinitialization. If you need to resume training, you need to ensure that there are `rank:0_softmax_weight_mom.pkl` and `rank:0_softmax_weight.pkl` in the output directory. ++ `is_bin`: Whether the training data is bin format, default as True. * The output log examples are as follows: diff --git a/recognition/arcface_paddle/config.py b/recognition/arcface_paddle/config.py index 1fa4f39..49ee19b 100644 --- a/recognition/arcface_paddle/config.py +++ b/recognition/arcface_paddle/config.py @@ -15,19 +15,16 @@ from easydict import EasyDict as edict config = edict() -config.dataset = "emore" config.sample_rate = 1 config.momentum = 0.9 config.data_dir = "./MS1M_bin" config.file_list = "MS1M_bin/label.txt" config.num_classes = 85742 -config.num_image = 5822653 config.num_epoch = 32 config.warmup_epoch = 1 config.val_targets = ["lfw", "cfp_fp", "agedb_30"] - def lr_step_func(epoch): return ((epoch + 1) / (4 + 1))**2 if epoch < -1 else 0.1**len( [m for m in [6, 12, 18, 24] if m - 1 <= epoch]) diff --git a/recognition/arcface_paddle/dataloader/common_dataset.py b/recognition/arcface_paddle/dataloader/common_dataset.py index 5a70d2a..754d4f5 100644 --- a/recognition/arcface_paddle/dataloader/common_dataset.py +++ b/recognition/arcface_paddle/dataloader/common_dataset.py @@ -15,6 +15,7 @@ from paddle.io import Dataset from paddle.vision import transforms import os +import cv2 from PIL import Image import random import paddle @@ -24,12 +25,13 @@ from dataloader.kv_helper import read_img_from_bin class CommonDataset(Dataset): - def __init__(self, root_dir, label_file): + def __init__(self, root_dir, label_file, is_bin=True): super(CommonDataset, self).__init__() self.root_dir = root_dir self.label_file = label_file self.full_lines = self.get_file_list(label_file) self.delimiter = "\t" + self.is_bin = is_bin self.transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), @@ -54,7 +56,11 @@ class CommonDataset(Dataset): label = int(label) label = paddle.to_tensor(label, dtype='int64') img_path = os.path.join(self.root_dir, img_path) - img = read_img_from_bin(img_path)[:, :, ::-1] + if self.is_bin: + img = read_img_from_bin(img_path) + else: + img = cv2.imread(img_path) + img = img[:, :, ::-1] img = self.transform(img) return img, label diff --git a/recognition/arcface_paddle/shell/train.sh b/recognition/arcface_paddle/shell/train.sh index b05783f..cd86540 100644 --- a/recognition/arcface_paddle/shell/train.sh +++ b/recognition/arcface_paddle/shell/train.sh @@ -8,8 +8,9 @@ log_name="log" python3.7 train.py \ --network 'MobileFaceNet_128' \ --lr=0.1 \ - --batch_size 512 \ + --batch_size 16 \ --weight_decay 2e-4 \ --embedding_size 128 \ --logdir="${log_name}" \ - --output "emore_arcface" + --output "emore_arcface" \ + --is_bin=False diff --git a/recognition/arcface_paddle/tools/convert_image_bin.py b/recognition/arcface_paddle/tools/convert_image_bin.py index 45516ba..c0855e9 100644 --- a/recognition/arcface_paddle/tools/convert_image_bin.py +++ b/recognition/arcface_paddle/tools/convert_image_bin.py @@ -14,6 +14,10 @@ import os import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.abspath(os.path.join(__dir__, '../'))) + import argparse import cv2 from dataloader.kv_helper import read_img_from_bin @@ -29,7 +33,7 @@ def get_file_list(img_file, end=('jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp')): imgs_lists.append(img_file) elif os.path.isdir(img_file): for single_file in os.listdir(img_file): - if single_file.split('.')[-1] in img_end: + if single_file.split('.')[-1] in end: imgs_lists.append(os.path.join(img_file, single_file)) if len(imgs_lists) == 0: raise Exception("not found any img file in {}".format(img_file)) @@ -53,20 +57,20 @@ def parse_args(): def main(args): assert args.mode in ["image2bin", "bin2image"] - assert os.path.isdir(args.image_path) - assert os.path.isfile(args.bin_path) os.makedirs(args.image_path, exist_ok=True) os.makedirs(args.bin_path, exist_ok=True) + assert os.path.isdir(args.image_path) + assert os.path.isdir(args.bin_path) if args.mode == "image2bin": - img_list = get_file_list(args.bin_path) + img_list = get_file_list(args.image_path) for idx, img_fp in enumerate(img_list): if idx % len(img_list) == 1000: print("conversion process: [{}]/[{}]".format(idx, len(img_list))) img_name = os.path.basename(img_fp) output_path = os.path.join(args.bin_path, - os.path.splitext(img_name) + ".bin") + os.path.splitext(img_name)[0] + ".bin") trans_img_to_bin(img_fp, output_path) elif args.mode == "bin2image": bin_list = get_file_list(args.bin_path, end=("bin", )) @@ -74,9 +78,9 @@ def main(args): if idx % len(bin_list) == 1000: print("conversion process: [{}]/[{}]".format(idx, len(bin_list))) - bin_name = os.path.basename(img_fp) + bin_name = os.path.basename(bin_fp) output_path = os.path.join(args.image_path, - os.path.splitext(bin_name) + ".jpg") + os.path.splitext(bin_name)[0] + ".jpg") img = read_img_from_bin(bin_fp) cv2.imwrite(output_path, img) diff --git a/recognition/arcface_paddle/train.py b/recognition/arcface_paddle/train.py index 3b09e83..c150cc9 100644 --- a/recognition/arcface_paddle/train.py +++ b/recognition/arcface_paddle/train.py @@ -1,169 +1,171 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dataloader import CommonDataset - -from paddle.io import DataLoader -from config import config as cfg -from partial_fc import PartialFC -from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint -from utils.utils_logging import AverageMeter -import paddle.nn.functional as F -from paddle.nn import ClipGradByNorm -from visualdl import LogWriter -import paddle -import backbones -import argparse -import losses -import time -import os -import sys - - -def main(args): - world_size = int(1.0) - rank = int(0.0) - - if not os.path.exists(args.output): - os.makedirs(args.output) - else: - time.sleep(2) - - writer = LogWriter(logdir=args.logdir) - - trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list) - # trainset = BinDataset(root_dir=cfg.data_dir) - train_loader = DataLoader( - dataset=trainset, - batch_size=args.batch_size, - shuffle=True, - drop_last=True, - num_workers=0) - - backbone = eval("backbones.{}".format(args.network))() - backbone.train() - - clip_by_norm = ClipGradByNorm(5.0) - margin_softmax = eval("losses.{}".format(args.loss))() - - module_partial_fc = PartialFC( - rank=0, - world_size=1, - resume=0, - batch_size=args.batch_size, - margin_softmax=margin_softmax, - num_classes=cfg.num_classes, - sample_rate=cfg.sample_rate, - embedding_size=args.embedding_size, - prefix=args.output) - - scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay( - learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True) - scheduler_backbone = paddle.optimizer.lr.LinearWarmup( - learning_rate=scheduler_backbone_decay, - warmup_steps=cfg.warmup_epoch, - start_lr=0, - end_lr=args.lr / 512 * args.batch_size, - verbose=True) - opt_backbone = paddle.optimizer.Momentum( - parameters=backbone.parameters(), - learning_rate=scheduler_backbone, - momentum=0.9, - weight_decay=args.weight_decay, - grad_clip=clip_by_norm) - - scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay( - learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True) - scheduler_pfc = paddle.optimizer.lr.LinearWarmup( - learning_rate=scheduler_pfc_decay, - warmup_steps=cfg.warmup_epoch, - start_lr=0, - end_lr=args.lr / 512 * args.batch_size, - verbose=True) - opt_pfc = paddle.optimizer.Momentum( - parameters=module_partial_fc.parameters(), - learning_rate=scheduler_pfc, - momentum=0.9, - weight_decay=args.weight_decay, - grad_clip=clip_by_norm) - - start_epoch = 0 - total_step = int( - len(trainset) / args.batch_size / world_size * cfg.num_epoch) - if rank == 0: - print("Total Step is: %d" % total_step) - - callback_verification = CallBackVerification(2000, rank, cfg.val_targets, - cfg.data_dir) - callback_logging = CallBackLogging(10, rank, total_step, args.batch_size, - world_size, writer) - callback_checkpoint = CallBackModelCheckpoint(rank, args.output, - args.network) - - loss = AverageMeter() - global_step = 0 - for epoch in range(start_epoch, cfg.num_epoch): - for step, (img, label) in enumerate(train_loader): - label = label.flatten() - global_step += 1 - sys.stdout.flush() - features = F.normalize(backbone(img)) - x_grad, loss_v = module_partial_fc.forward_backward( - label, features, opt_pfc) - sys.stdout.flush() - (features.multiply(x_grad)).backward() - sys.stdout.flush() - opt_backbone.step() - opt_pfc.step() - module_partial_fc.update() - opt_backbone.clear_gradients() - opt_pfc.clear_gradients() - sys.stdout.flush() - - lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0] - lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0] - - loss.update(loss_v, 1) - callback_logging(global_step, loss, epoch, lr_backbone_value, - lr_pfc_value) - sys.stdout.flush() - callback_verification(global_step, backbone) - callback_checkpoint(global_step, backbone, module_partial_fc) - scheduler_backbone.step() - scheduler_pfc.step() - writer.close() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Paddle ArcFace Training') - parser.add_argument( - '--network', - type=str, - default='MobileFaceNet_128', - help='backbone network') - parser.add_argument( - '--loss', type=str, default='ArcFace', help='loss function') - parser.add_argument('--lr', type=float, default=0.1, help='learning rate') - parser.add_argument( - '--batch_size', type=int, default=512, help='batch size') - parser.add_argument( - '--weight_decay', type=float, default=2e-4, help='weight decay') - parser.add_argument( - '--embedding_size', type=int, default=128, help='embedding size') - parser.add_argument('--logdir', type=str, default='./log', help='log dir') - parser.add_argument( - '--output', type=str, default='emore_arcface', help='output dir') - parser.add_argument('--resume', type=int, default=0, help='model resuming') - args = parser.parse_args() - main(args) +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataloader import CommonDataset + +from paddle.io import DataLoader +from config import config as cfg +from partial_fc import PartialFC +from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint +from utils.utils_logging import AverageMeter +import paddle.nn.functional as F +from paddle.nn import ClipGradByNorm +from visualdl import LogWriter +import paddle +import backbones +import argparse +import losses +import time +import os +import sys + + +def main(args): + world_size = int(1.0) + rank = int(0.0) + + if not os.path.exists(args.output): + os.makedirs(args.output) + else: + time.sleep(2) + + writer = LogWriter(logdir=args.logdir) + trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list, is_bin=args.is_bin) + train_loader = DataLoader( + dataset=trainset, + batch_size=args.batch_size, + shuffle=True, + drop_last=True, + num_workers=0) + + backbone = eval("backbones.{}".format(args.network))() + backbone.train() + + clip_by_norm = ClipGradByNorm(5.0) + margin_softmax = eval("losses.{}".format(args.loss))() + + module_partial_fc = PartialFC( + rank=0, + world_size=1, + resume=0, + batch_size=args.batch_size, + margin_softmax=margin_softmax, + num_classes=cfg.num_classes, + sample_rate=cfg.sample_rate, + embedding_size=args.embedding_size, + prefix=args.output) + + scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay( + learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True) + scheduler_backbone = paddle.optimizer.lr.LinearWarmup( + learning_rate=scheduler_backbone_decay, + warmup_steps=cfg.warmup_epoch, + start_lr=0, + end_lr=args.lr / 512 * args.batch_size, + verbose=True) + opt_backbone = paddle.optimizer.Momentum( + parameters=backbone.parameters(), + learning_rate=scheduler_backbone, + momentum=0.9, + weight_decay=args.weight_decay, + grad_clip=clip_by_norm) + + scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay( + learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True) + scheduler_pfc = paddle.optimizer.lr.LinearWarmup( + learning_rate=scheduler_pfc_decay, + warmup_steps=cfg.warmup_epoch, + start_lr=0, + end_lr=args.lr / 512 * args.batch_size, + verbose=True) + opt_pfc = paddle.optimizer.Momentum( + parameters=module_partial_fc.parameters(), + learning_rate=scheduler_pfc, + momentum=0.9, + weight_decay=args.weight_decay, + grad_clip=clip_by_norm) + + start_epoch = 0 + total_step = int( + len(trainset) / args.batch_size / world_size * cfg.num_epoch) + if rank == 0: + print("Total Step is: %d" % total_step) + + callback_verification = CallBackVerification(2000, rank, cfg.val_targets, + cfg.data_dir) + callback_logging = CallBackLogging(10, rank, total_step, args.batch_size, + world_size, writer) + callback_checkpoint = CallBackModelCheckpoint(rank, args.output, + args.network) + + loss = AverageMeter() + global_step = 0 + for epoch in range(start_epoch, cfg.num_epoch): + for step, (img, label) in enumerate(train_loader): + label = label.flatten() + global_step += 1 + sys.stdout.flush() + features = F.normalize(backbone(img)) + x_grad, loss_v = module_partial_fc.forward_backward( + label, features, opt_pfc) + sys.stdout.flush() + (features.multiply(x_grad)).backward() + sys.stdout.flush() + opt_backbone.step() + opt_pfc.step() + module_partial_fc.update() + opt_backbone.clear_gradients() + opt_pfc.clear_gradients() + sys.stdout.flush() + + lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0] + lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0] + + loss.update(loss_v, 1) + callback_logging(global_step, loss, epoch, lr_backbone_value, + lr_pfc_value) + sys.stdout.flush() + callback_verification(global_step, backbone) + callback_checkpoint(global_step, backbone, module_partial_fc) + scheduler_backbone.step() + scheduler_pfc.step() + writer.close() + + +if __name__ == '__main__': + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser(description='Paddle ArcFace Training') + parser.add_argument( + '--network', + type=str, + default='MobileFaceNet_128', + help='backbone network') + parser.add_argument( + '--loss', type=str, default='ArcFace', help='loss function') + parser.add_argument('--lr', type=float, default=0.1, help='learning rate') + parser.add_argument( + '--batch_size', type=int, default=512, help='batch size') + parser.add_argument( + '--weight_decay', type=float, default=2e-4, help='weight decay') + parser.add_argument( + '--embedding_size', type=int, default=128, help='embedding size') + parser.add_argument('--logdir', type=str, default='./log', help='log dir') + parser.add_argument( + '--output', type=str, default='emore_arcface', help='output dir') + parser.add_argument('--resume', type=int, default=0, help='model resuming') + parser.add_argument('--is_bin', type=str2bool, default=True, help='whether the train data is bin or original image file') + args = parser.parse_args() + main(args)