Merge pull request #1675 from littletomatodonkey/fix_convert_img

fix some error for arcface paddle
This commit is contained in:
Jia Guo
2021-07-24 11:32:47 +08:00
committed by GitHub
7 changed files with 223 additions and 184 deletions

View File

@@ -75,6 +75,7 @@ Arcface-Paddle/MSiM_bin
注意:
* 这里为了更加方便`Dataloader`读取数据,将原始的`train.rec`文件转化为很多`bin文件`,每个`bin文件`都唯一对应一张原始图像。如果您采集得到的文件均为原始的图像文件,那么可以参考`3.3节`中的内容完成原始图像文件到bin文件的转换。
* 如果你的训练数据为原始的图像文件列表格式,那么在训练的时候,只需要将`is_bin`修改为`False`即可,下面的训练脚本中也会有具体的使用说明。
### 3.3 原始图像文件与bin文件的转换
@@ -95,6 +96,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa
准备好配置文件后,可以通过以下方式开始训练过程。
```bash
# 如果你的训练数据为bin文件格式的图像文件可以使用下面的命令进行训练
python3.7 train.py \
--network 'MobileFaceNet_128' \
--lr=0.1 \
@@ -104,6 +106,18 @@ python3.7 train.py \
--logdir="log" \
--output "emore_arcface" \
--resume 0
# 如果你的训练数据为原始图像文件,可以将`is_bin`指定为False进行训练
python3.7 train.py \
--network 'MobileFaceNet_128' \
--lr=0.1 \
--batch_size 512 \
--weight_decay 2e-4 \
--embedding_size 128 \
--logdir="log" \
--output "emore_arcface" \
--resume 0 \
--is_bin False
```
上述命令中,需要传入如下参数:
@@ -116,6 +130,7 @@ python3.7 train.py \
+ `logdir`: VDL 输出 log 的存储路径, 默认值为 `"log"`;
+ `output`: 训练过程中的模型文件存储路径, 默认值为 `"emore_arcface"`;
+ `resume`: 是否恢复分类层的模型权重。 `1` 表示使用之前好的权重文件进行初始化, `0` 代表重新初始化。 如果想要恢复分类层的模型权重, 需要保证 `output` 目录下包含: `rank:0_softmax_weight_mom.pkl` 和 `rank:0_softmax_weight.pkl` 两个文件。
+ `is_bin`: 训练数据是否为bin文件格式默认为True。
* 训练过程中的输出 log 示例如下:

View File

@@ -79,7 +79,7 @@ If you want to use customed dataset, you can arrange your data according to the
**Note:**
* For using `Dataloader` api for reading data, we convert `train.rec` into many little `bin` files, each `bin` file denotes a single image. If your dataset just contains origin image files. You can either rewrite the dataloader file or refer to section 3.3 to convert the original image files to `bin` files.
* If you train data is image format rather than `bin` format. For the training process, you just need to set the parameter `is_bin` as `False`. More details can be seen in the following training script.
### 3.3 Transform between original image files and bin files
@@ -100,6 +100,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa
After preparing the configuration file, The training process can be started in the following way.
```bash
# for the bin format training data
python3.7 train.py \
--network 'MobileFaceNet_128' \
--lr=0.1 \
@@ -109,6 +110,18 @@ python3.7 train.py \
--logdir="log" \
--output "emore_arcface" \
--resume 0
# for the original image format training data
python3.7 train.py \
--network 'MobileFaceNet_128' \
--lr=0.1 \
--batch_size 512 \
--weight_decay 2e-4 \
--embedding_size 128 \
--logdir="log" \
--output "emore_arcface" \
--resume 0 \
--is_bin False
```
Among them:
@@ -121,6 +134,7 @@ Among them:
+ `logdir`: VDL log storage directory, default by `"log"`;
+ `output`: Model stored path, default by: `"emore_arcface"`;
+ `resume`: Restore the classification layer parameters. `1` represents recovery parameters, and `0` represents reinitialization. If you need to resume training, you need to ensure that there are `rank:0_softmax_weight_mom.pkl` and `rank:0_softmax_weight.pkl` in the output directory.
+ `is_bin`: Whether the training data is bin format, default as True.
* The output log examples are as follows:

View File

@@ -15,19 +15,16 @@
from easydict import EasyDict as edict
config = edict()
config.dataset = "emore"
config.sample_rate = 1
config.momentum = 0.9
config.data_dir = "./MS1M_bin"
config.file_list = "MS1M_bin/label.txt"
config.num_classes = 85742
config.num_image = 5822653
config.num_epoch = 32
config.warmup_epoch = 1
config.val_targets = ["lfw", "cfp_fp", "agedb_30"]
def lr_step_func(epoch):
return ((epoch + 1) / (4 + 1))**2 if epoch < -1 else 0.1**len(
[m for m in [6, 12, 18, 24] if m - 1 <= epoch])

View File

@@ -15,6 +15,7 @@
from paddle.io import Dataset
from paddle.vision import transforms
import os
import cv2
from PIL import Image
import random
import paddle
@@ -24,12 +25,13 @@ from dataloader.kv_helper import read_img_from_bin
class CommonDataset(Dataset):
def __init__(self, root_dir, label_file):
def __init__(self, root_dir, label_file, is_bin=True):
super(CommonDataset, self).__init__()
self.root_dir = root_dir
self.label_file = label_file
self.full_lines = self.get_file_list(label_file)
self.delimiter = "\t"
self.is_bin = is_bin
self.transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
@@ -54,7 +56,11 @@ class CommonDataset(Dataset):
label = int(label)
label = paddle.to_tensor(label, dtype='int64')
img_path = os.path.join(self.root_dir, img_path)
img = read_img_from_bin(img_path)[:, :, ::-1]
if self.is_bin:
img = read_img_from_bin(img_path)
else:
img = cv2.imread(img_path)
img = img[:, :, ::-1]
img = self.transform(img)
return img, label

View File

@@ -8,8 +8,9 @@ log_name="log"
python3.7 train.py \
--network 'MobileFaceNet_128' \
--lr=0.1 \
--batch_size 512 \
--batch_size 16 \
--weight_decay 2e-4 \
--embedding_size 128 \
--logdir="${log_name}" \
--output "emore_arcface"
--output "emore_arcface" \
--is_bin=False

View File

@@ -14,6 +14,10 @@
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
import argparse
import cv2
from dataloader.kv_helper import read_img_from_bin
@@ -29,7 +33,7 @@ def get_file_list(img_file, end=('jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp')):
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
if single_file.split('.')[-1] in img_end:
if single_file.split('.')[-1] in end:
imgs_lists.append(os.path.join(img_file, single_file))
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
@@ -53,20 +57,20 @@ def parse_args():
def main(args):
assert args.mode in ["image2bin", "bin2image"]
assert os.path.isdir(args.image_path)
assert os.path.isfile(args.bin_path)
os.makedirs(args.image_path, exist_ok=True)
os.makedirs(args.bin_path, exist_ok=True)
assert os.path.isdir(args.image_path)
assert os.path.isdir(args.bin_path)
if args.mode == "image2bin":
img_list = get_file_list(args.bin_path)
img_list = get_file_list(args.image_path)
for idx, img_fp in enumerate(img_list):
if idx % len(img_list) == 1000:
print("conversion process: [{}]/[{}]".format(idx,
len(img_list)))
img_name = os.path.basename(img_fp)
output_path = os.path.join(args.bin_path,
os.path.splitext(img_name) + ".bin")
os.path.splitext(img_name)[0] + ".bin")
trans_img_to_bin(img_fp, output_path)
elif args.mode == "bin2image":
bin_list = get_file_list(args.bin_path, end=("bin", ))
@@ -74,9 +78,9 @@ def main(args):
if idx % len(bin_list) == 1000:
print("conversion process: [{}]/[{}]".format(idx,
len(bin_list)))
bin_name = os.path.basename(img_fp)
bin_name = os.path.basename(bin_fp)
output_path = os.path.join(args.image_path,
os.path.splitext(bin_name) + ".jpg")
os.path.splitext(bin_name)[0] + ".jpg")
img = read_img_from_bin(bin_fp)
cv2.imwrite(output_path, img)

View File

@@ -1,169 +1,171 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataloader import CommonDataset
from paddle.io import DataLoader
from config import config as cfg
from partial_fc import PartialFC
from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint
from utils.utils_logging import AverageMeter
import paddle.nn.functional as F
from paddle.nn import ClipGradByNorm
from visualdl import LogWriter
import paddle
import backbones
import argparse
import losses
import time
import os
import sys
def main(args):
world_size = int(1.0)
rank = int(0.0)
if not os.path.exists(args.output):
os.makedirs(args.output)
else:
time.sleep(2)
writer = LogWriter(logdir=args.logdir)
trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list)
# trainset = BinDataset(root_dir=cfg.data_dir)
train_loader = DataLoader(
dataset=trainset,
batch_size=args.batch_size,
shuffle=True,
drop_last=True,
num_workers=0)
backbone = eval("backbones.{}".format(args.network))()
backbone.train()
clip_by_norm = ClipGradByNorm(5.0)
margin_softmax = eval("losses.{}".format(args.loss))()
module_partial_fc = PartialFC(
rank=0,
world_size=1,
resume=0,
batch_size=args.batch_size,
margin_softmax=margin_softmax,
num_classes=cfg.num_classes,
sample_rate=cfg.sample_rate,
embedding_size=args.embedding_size,
prefix=args.output)
scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay(
learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
scheduler_backbone = paddle.optimizer.lr.LinearWarmup(
learning_rate=scheduler_backbone_decay,
warmup_steps=cfg.warmup_epoch,
start_lr=0,
end_lr=args.lr / 512 * args.batch_size,
verbose=True)
opt_backbone = paddle.optimizer.Momentum(
parameters=backbone.parameters(),
learning_rate=scheduler_backbone,
momentum=0.9,
weight_decay=args.weight_decay,
grad_clip=clip_by_norm)
scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay(
learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
scheduler_pfc = paddle.optimizer.lr.LinearWarmup(
learning_rate=scheduler_pfc_decay,
warmup_steps=cfg.warmup_epoch,
start_lr=0,
end_lr=args.lr / 512 * args.batch_size,
verbose=True)
opt_pfc = paddle.optimizer.Momentum(
parameters=module_partial_fc.parameters(),
learning_rate=scheduler_pfc,
momentum=0.9,
weight_decay=args.weight_decay,
grad_clip=clip_by_norm)
start_epoch = 0
total_step = int(
len(trainset) / args.batch_size / world_size * cfg.num_epoch)
if rank == 0:
print("Total Step is: %d" % total_step)
callback_verification = CallBackVerification(2000, rank, cfg.val_targets,
cfg.data_dir)
callback_logging = CallBackLogging(10, rank, total_step, args.batch_size,
world_size, writer)
callback_checkpoint = CallBackModelCheckpoint(rank, args.output,
args.network)
loss = AverageMeter()
global_step = 0
for epoch in range(start_epoch, cfg.num_epoch):
for step, (img, label) in enumerate(train_loader):
label = label.flatten()
global_step += 1
sys.stdout.flush()
features = F.normalize(backbone(img))
x_grad, loss_v = module_partial_fc.forward_backward(
label, features, opt_pfc)
sys.stdout.flush()
(features.multiply(x_grad)).backward()
sys.stdout.flush()
opt_backbone.step()
opt_pfc.step()
module_partial_fc.update()
opt_backbone.clear_gradients()
opt_pfc.clear_gradients()
sys.stdout.flush()
lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0]
lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0]
loss.update(loss_v, 1)
callback_logging(global_step, loss, epoch, lr_backbone_value,
lr_pfc_value)
sys.stdout.flush()
callback_verification(global_step, backbone)
callback_checkpoint(global_step, backbone, module_partial_fc)
scheduler_backbone.step()
scheduler_pfc.step()
writer.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Paddle ArcFace Training')
parser.add_argument(
'--network',
type=str,
default='MobileFaceNet_128',
help='backbone network')
parser.add_argument(
'--loss', type=str, default='ArcFace', help='loss function')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
parser.add_argument(
'--batch_size', type=int, default=512, help='batch size')
parser.add_argument(
'--weight_decay', type=float, default=2e-4, help='weight decay')
parser.add_argument(
'--embedding_size', type=int, default=128, help='embedding size')
parser.add_argument('--logdir', type=str, default='./log', help='log dir')
parser.add_argument(
'--output', type=str, default='emore_arcface', help='output dir')
parser.add_argument('--resume', type=int, default=0, help='model resuming')
args = parser.parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataloader import CommonDataset
from paddle.io import DataLoader
from config import config as cfg
from partial_fc import PartialFC
from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint
from utils.utils_logging import AverageMeter
import paddle.nn.functional as F
from paddle.nn import ClipGradByNorm
from visualdl import LogWriter
import paddle
import backbones
import argparse
import losses
import time
import os
import sys
def main(args):
world_size = int(1.0)
rank = int(0.0)
if not os.path.exists(args.output):
os.makedirs(args.output)
else:
time.sleep(2)
writer = LogWriter(logdir=args.logdir)
trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list, is_bin=args.is_bin)
train_loader = DataLoader(
dataset=trainset,
batch_size=args.batch_size,
shuffle=True,
drop_last=True,
num_workers=0)
backbone = eval("backbones.{}".format(args.network))()
backbone.train()
clip_by_norm = ClipGradByNorm(5.0)
margin_softmax = eval("losses.{}".format(args.loss))()
module_partial_fc = PartialFC(
rank=0,
world_size=1,
resume=0,
batch_size=args.batch_size,
margin_softmax=margin_softmax,
num_classes=cfg.num_classes,
sample_rate=cfg.sample_rate,
embedding_size=args.embedding_size,
prefix=args.output)
scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay(
learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
scheduler_backbone = paddle.optimizer.lr.LinearWarmup(
learning_rate=scheduler_backbone_decay,
warmup_steps=cfg.warmup_epoch,
start_lr=0,
end_lr=args.lr / 512 * args.batch_size,
verbose=True)
opt_backbone = paddle.optimizer.Momentum(
parameters=backbone.parameters(),
learning_rate=scheduler_backbone,
momentum=0.9,
weight_decay=args.weight_decay,
grad_clip=clip_by_norm)
scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay(
learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
scheduler_pfc = paddle.optimizer.lr.LinearWarmup(
learning_rate=scheduler_pfc_decay,
warmup_steps=cfg.warmup_epoch,
start_lr=0,
end_lr=args.lr / 512 * args.batch_size,
verbose=True)
opt_pfc = paddle.optimizer.Momentum(
parameters=module_partial_fc.parameters(),
learning_rate=scheduler_pfc,
momentum=0.9,
weight_decay=args.weight_decay,
grad_clip=clip_by_norm)
start_epoch = 0
total_step = int(
len(trainset) / args.batch_size / world_size * cfg.num_epoch)
if rank == 0:
print("Total Step is: %d" % total_step)
callback_verification = CallBackVerification(2000, rank, cfg.val_targets,
cfg.data_dir)
callback_logging = CallBackLogging(10, rank, total_step, args.batch_size,
world_size, writer)
callback_checkpoint = CallBackModelCheckpoint(rank, args.output,
args.network)
loss = AverageMeter()
global_step = 0
for epoch in range(start_epoch, cfg.num_epoch):
for step, (img, label) in enumerate(train_loader):
label = label.flatten()
global_step += 1
sys.stdout.flush()
features = F.normalize(backbone(img))
x_grad, loss_v = module_partial_fc.forward_backward(
label, features, opt_pfc)
sys.stdout.flush()
(features.multiply(x_grad)).backward()
sys.stdout.flush()
opt_backbone.step()
opt_pfc.step()
module_partial_fc.update()
opt_backbone.clear_gradients()
opt_pfc.clear_gradients()
sys.stdout.flush()
lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0]
lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0]
loss.update(loss_v, 1)
callback_logging(global_step, loss, epoch, lr_backbone_value,
lr_pfc_value)
sys.stdout.flush()
callback_verification(global_step, backbone)
callback_checkpoint(global_step, backbone, module_partial_fc)
scheduler_backbone.step()
scheduler_pfc.step()
writer.close()
if __name__ == '__main__':
def str2bool(v):
return v.lower() in ("true", "t", "1")
parser = argparse.ArgumentParser(description='Paddle ArcFace Training')
parser.add_argument(
'--network',
type=str,
default='MobileFaceNet_128',
help='backbone network')
parser.add_argument(
'--loss', type=str, default='ArcFace', help='loss function')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
parser.add_argument(
'--batch_size', type=int, default=512, help='batch size')
parser.add_argument(
'--weight_decay', type=float, default=2e-4, help='weight decay')
parser.add_argument(
'--embedding_size', type=int, default=128, help='embedding size')
parser.add_argument('--logdir', type=str, default='./log', help='log dir')
parser.add_argument(
'--output', type=str, default='emore_arcface', help='output dir')
parser.add_argument('--resume', type=int, default=0, help='model resuming')
parser.add_argument('--is_bin', type=str2bool, default=True, help='whether the train data is bin or original image file')
args = parser.parse_args()
main(args)