Merge pull request #1675 from littletomatodonkey/fix_convert_img

fix some error for arcface paddle
2026-05-18 22:57:49 +00:00 · 2021-07-24 11:32:47 +08:00
parent 01840153a2 011c279197
commit cfea8ccffd
7 changed files with 223 additions and 184 deletions
--- a/recognition/arcface_paddle/README_ch.md
+++ b/recognition/arcface_paddle/README_ch.md
@@ -75,6 +75,7 @@ Arcface-Paddle/MSiM_bin

 注意：
 * 这里为了更加方便`Dataloader`读取数据，将原始的`train.rec`文件转化为很多`bin文件`，每个`bin文件`都唯一对应一张原始图像。如果您采集得到的文件均为原始的图像文件，那么可以参考`3.3节`中的内容完成原始图像文件到bin文件的转换。
+* 如果你的训练数据为原始的图像文件列表格式，那么在训练的时候，只需要将`is_bin`修改为`False`即可，下面的训练脚本中也会有具体的使用说明。

 ### 3.3 原始图像文件与bin文件的转换

@@ -95,6 +96,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa
 准备好配置文件后，可以通过以下方式开始训练过程。

 ```bash
+# 如果你的训练数据为bin文件格式的图像文件，可以使用下面的命令进行训练
 python3.7 train.py \
    --network 'MobileFaceNet_128' \
    --lr=0.1 \
@@ -104,6 +106,18 @@ python3.7 train.py \
    --logdir="log" \
    --output "emore_arcface" \
    --resume 0
+
+# 如果你的训练数据为原始图像文件，可以将`is_bin`指定为False，进行训练
+python3.7 train.py \
+    --network 'MobileFaceNet_128' \
+    --lr=0.1 \
+    --batch_size 512 \
+    --weight_decay 2e-4 \
+    --embedding_size 128 \
+    --logdir="log" \
+    --output "emore_arcface" \
+    --resume 0 \
+    --is_bin False
 ```

 上述命令中，需要传入如下参数:
@@ -116,6 +130,7 @@ python3.7 train.py \
 + `logdir`: VDL 输出 log 的存储路径, 默认值为 `"log"`;
 + `output`: 训练过程中的模型文件存储路径, 默认值为 `"emore_arcface"`;
 + `resume`: 是否恢复分类层的模型权重。 `1` 表示使用之前好的权重文件进行初始化，  `0` 代表重新初始化。 如果想要恢复分类层的模型权重， 需要保证 `output` 目录下包含： `rank:0_softmax_weight_mom.pkl` 和 `rank:0_softmax_weight.pkl` 两个文件。
+ `is_bin`: 训练数据是否为bin文件格式，默认为True。

 * 训练过程中的输出 log 示例如下:

--- a/recognition/arcface_paddle/README_en.md
+++ b/recognition/arcface_paddle/README_en.md
@@ -79,7 +79,7 @@ If you want to use customed dataset, you can arrange your data according to the

 **Note:**
 * For using `Dataloader` api for reading data, we convert `train.rec` into many little `bin` files, each `bin` file denotes a single image. If your dataset just contains origin image files. You can either rewrite the dataloader file or refer to section 3.3 to convert the original image files to `bin` files.
-
+* If you train data is image format rather than `bin` format. For the training process, you just need to set the parameter `is_bin` as `False`. More details can be seen in the following training script.

 ### 3.3 Transform between original image files and bin files

@@ -100,6 +100,7 @@ python3.7 tools/convert_image_bin.py --image_path="your/input/bin/path" --bin_pa
 After preparing the configuration file, The training process can be started in the following way.

 ```bash
+# for the bin format training data
 python3.7 train.py \
    --network 'MobileFaceNet_128' \
    --lr=0.1 \
@@ -109,6 +110,18 @@ python3.7 train.py \
    --logdir="log" \
    --output "emore_arcface" \
    --resume 0
+
+# for the original image format training data
+python3.7 train.py \
+    --network 'MobileFaceNet_128' \
+    --lr=0.1 \
+    --batch_size 512 \
+    --weight_decay 2e-4 \
+    --embedding_size 128 \
+    --logdir="log" \
+    --output "emore_arcface" \
+    --resume 0 \
+    --is_bin False
 ```

 Among them:
@@ -121,6 +134,7 @@ Among them:
 + `logdir`: VDL log storage directory, default by `"log"`;
 + `output`: Model stored path, default by: `"emore_arcface"`;
 + `resume`: Restore the classification layer parameters. `1` represents recovery parameters, and `0` represents reinitialization. If you need to resume training, you need to ensure that there are `rank:0_softmax_weight_mom.pkl` and `rank:0_softmax_weight.pkl` in the output directory.
+ `is_bin`: Whether the training data is bin format, default as True.

 * The output log examples are as follows:

--- a/recognition/arcface_paddle/config.py
+++ b/recognition/arcface_paddle/config.py
@@ -15,19 +15,16 @@
 from easydict import EasyDict as edict

 config = edict()
-config.dataset = "emore"
 config.sample_rate = 1
 config.momentum = 0.9

 config.data_dir = "./MS1M_bin"
 config.file_list = "MS1M_bin/label.txt"
 config.num_classes = 85742
-config.num_image = 5822653
 config.num_epoch = 32
 config.warmup_epoch = 1
 config.val_targets = ["lfw", "cfp_fp", "agedb_30"]

-
 def lr_step_func(epoch):
    return ((epoch + 1) / (4 + 1))**2 if epoch < -1 else 0.1**len(
        [m for m in [6, 12, 18, 24] if m - 1 <= epoch])
--- a/recognition/arcface_paddle/dataloader/common_dataset.py
+++ b/recognition/arcface_paddle/dataloader/common_dataset.py
@@ -15,6 +15,7 @@
 from paddle.io import Dataset
 from paddle.vision import transforms
 import os
+import cv2
 from PIL import Image
 import random
 import paddle
@@ -24,12 +25,13 @@ from dataloader.kv_helper import read_img_from_bin


 class CommonDataset(Dataset):
-    def __init__(self, root_dir, label_file):
+    def __init__(self, root_dir, label_file, is_bin=True):
        super(CommonDataset, self).__init__()
        self.root_dir = root_dir
        self.label_file = label_file
        self.full_lines = self.get_file_list(label_file)
        self.delimiter = "\t"
+        self.is_bin = is_bin
        self.transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
@@ -54,7 +56,11 @@ class CommonDataset(Dataset):
            label = int(label)
            label = paddle.to_tensor(label, dtype='int64')
            img_path = os.path.join(self.root_dir, img_path)
-            img = read_img_from_bin(img_path)[:, :, ::-1]
+            if self.is_bin:
+                img = read_img_from_bin(img_path)
+            else:
+                img = cv2.imread(img_path)
+            img = img[:, :, ::-1]
            img = self.transform(img)
            return img, label

--- a/recognition/arcface_paddle/shell/train.sh
+++ b/recognition/arcface_paddle/shell/train.sh
@@ -8,8 +8,9 @@ log_name="log"
 python3.7 train.py \
    --network 'MobileFaceNet_128' \
    --lr=0.1 \
-    --batch_size 512 \
+    --batch_size 16 \
    --weight_decay 2e-4 \
    --embedding_size 128 \
    --logdir="${log_name}" \
-    --output "emore_arcface"
+    --output "emore_arcface" \
+    --is_bin=False
--- a/recognition/arcface_paddle/tools/convert_image_bin.py
+++ b/recognition/arcface_paddle/tools/convert_image_bin.py
@@ -14,6 +14,10 @@

 import os
 import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
+
 import argparse
 import cv2
 from dataloader.kv_helper import read_img_from_bin
@@ -29,7 +33,7 @@ def get_file_list(img_file, end=('jpg', 'png', 'jpeg', 'JPEG', 'JPG', 'bmp')):
        imgs_lists.append(img_file)
    elif os.path.isdir(img_file):
        for single_file in os.listdir(img_file):
-            if single_file.split('.')[-1] in img_end:
+            if single_file.split('.')[-1] in end:
                imgs_lists.append(os.path.join(img_file, single_file))
    if len(imgs_lists) == 0:
        raise Exception("not found any img file in {}".format(img_file))
@@ -53,20 +57,20 @@ def parse_args():

 def main(args):
    assert args.mode in ["image2bin", "bin2image"]
-    assert os.path.isdir(args.image_path)
-    assert os.path.isfile(args.bin_path)
    os.makedirs(args.image_path, exist_ok=True)
    os.makedirs(args.bin_path, exist_ok=True)
+    assert os.path.isdir(args.image_path)
+    assert os.path.isdir(args.bin_path)

    if args.mode == "image2bin":
-        img_list = get_file_list(args.bin_path)
+        img_list = get_file_list(args.image_path)
        for idx, img_fp in enumerate(img_list):
            if idx % len(img_list) == 1000:
                print("conversion process: [{}]/[{}]".format(idx,
                                                             len(img_list)))
            img_name = os.path.basename(img_fp)
            output_path = os.path.join(args.bin_path,
-                                       os.path.splitext(img_name) + ".bin")
+                                       os.path.splitext(img_name)[0] + ".bin")
            trans_img_to_bin(img_fp, output_path)
    elif args.mode == "bin2image":
        bin_list = get_file_list(args.bin_path, end=("bin", ))
@@ -74,9 +78,9 @@ def main(args):
            if idx % len(bin_list) == 1000:
                print("conversion process: [{}]/[{}]".format(idx,
                                                             len(bin_list)))
-            bin_name = os.path.basename(img_fp)
+            bin_name = os.path.basename(bin_fp)
            output_path = os.path.join(args.image_path,
-                                       os.path.splitext(bin_name) + ".jpg")
+                                       os.path.splitext(bin_name)[0] + ".jpg")
            img = read_img_from_bin(bin_fp)
            cv2.imwrite(output_path, img)

--- a/recognition/arcface_paddle/train.py
+++ b/recognition/arcface_paddle/train.py
@@ -1,169 +1,171 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataloader import CommonDataset
-
-from paddle.io import DataLoader
-from config import config as cfg
-from partial_fc import PartialFC
-from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint
-from utils.utils_logging import AverageMeter
-import paddle.nn.functional as F
-from paddle.nn import ClipGradByNorm
-from visualdl import LogWriter
-import paddle
-import backbones
-import argparse
-import losses
-import time
-import os
-import sys
-
-
-def main(args):
-    world_size = int(1.0)
-    rank = int(0.0)
-
-    if not os.path.exists(args.output):
-        os.makedirs(args.output)
-    else:
-        time.sleep(2)
-
-    writer = LogWriter(logdir=args.logdir)
-
-    trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list)
-    # trainset = BinDataset(root_dir=cfg.data_dir)
-    train_loader = DataLoader(
-        dataset=trainset,
-        batch_size=args.batch_size,
-        shuffle=True,
-        drop_last=True,
-        num_workers=0)
-
-    backbone = eval("backbones.{}".format(args.network))()
-    backbone.train()
-
-    clip_by_norm = ClipGradByNorm(5.0)
-    margin_softmax = eval("losses.{}".format(args.loss))()
-
-    module_partial_fc = PartialFC(
-        rank=0,
-        world_size=1,
-        resume=0,
-        batch_size=args.batch_size,
-        margin_softmax=margin_softmax,
-        num_classes=cfg.num_classes,
-        sample_rate=cfg.sample_rate,
-        embedding_size=args.embedding_size,
-        prefix=args.output)
-
-    scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay(
-        learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
-    scheduler_backbone = paddle.optimizer.lr.LinearWarmup(
-        learning_rate=scheduler_backbone_decay,
-        warmup_steps=cfg.warmup_epoch,
-        start_lr=0,
-        end_lr=args.lr / 512 * args.batch_size,
-        verbose=True)
-    opt_backbone = paddle.optimizer.Momentum(
-        parameters=backbone.parameters(),
-        learning_rate=scheduler_backbone,
-        momentum=0.9,
-        weight_decay=args.weight_decay,
-        grad_clip=clip_by_norm)
-
-    scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay(
-        learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
-    scheduler_pfc = paddle.optimizer.lr.LinearWarmup(
-        learning_rate=scheduler_pfc_decay,
-        warmup_steps=cfg.warmup_epoch,
-        start_lr=0,
-        end_lr=args.lr / 512 * args.batch_size,
-        verbose=True)
-    opt_pfc = paddle.optimizer.Momentum(
-        parameters=module_partial_fc.parameters(),
-        learning_rate=scheduler_pfc,
-        momentum=0.9,
-        weight_decay=args.weight_decay,
-        grad_clip=clip_by_norm)
-
-    start_epoch = 0
-    total_step = int(
-        len(trainset) / args.batch_size / world_size * cfg.num_epoch)
-    if rank == 0:
-        print("Total Step is: %d" % total_step)
-
-    callback_verification = CallBackVerification(2000, rank, cfg.val_targets,
-                                                 cfg.data_dir)
-    callback_logging = CallBackLogging(10, rank, total_step, args.batch_size,
-                                       world_size, writer)
-    callback_checkpoint = CallBackModelCheckpoint(rank, args.output,
-                                                  args.network)
-
-    loss = AverageMeter()
-    global_step = 0
-    for epoch in range(start_epoch, cfg.num_epoch):
-        for step, (img, label) in enumerate(train_loader):
-            label = label.flatten()
-            global_step += 1
-            sys.stdout.flush()
-            features = F.normalize(backbone(img))
-            x_grad, loss_v = module_partial_fc.forward_backward(
-                label, features, opt_pfc)
-            sys.stdout.flush()
-            (features.multiply(x_grad)).backward()
-            sys.stdout.flush()
-            opt_backbone.step()
-            opt_pfc.step()
-            module_partial_fc.update()
-            opt_backbone.clear_gradients()
-            opt_pfc.clear_gradients()
-            sys.stdout.flush()
-
-            lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0]
-            lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0]
-
-            loss.update(loss_v, 1)
-            callback_logging(global_step, loss, epoch, lr_backbone_value,
-                             lr_pfc_value)
-            sys.stdout.flush()
-            callback_verification(global_step, backbone)
-        callback_checkpoint(global_step, backbone, module_partial_fc)
-        scheduler_backbone.step()
-        scheduler_pfc.step()
-    writer.close()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Paddle ArcFace Training')
-    parser.add_argument(
-        '--network',
-        type=str,
-        default='MobileFaceNet_128',
-        help='backbone network')
-    parser.add_argument(
-        '--loss', type=str, default='ArcFace', help='loss function')
-    parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
-    parser.add_argument(
-        '--batch_size', type=int, default=512, help='batch size')
-    parser.add_argument(
-        '--weight_decay', type=float, default=2e-4, help='weight decay')
-    parser.add_argument(
-        '--embedding_size', type=int, default=128, help='embedding size')
-    parser.add_argument('--logdir', type=str, default='./log', help='log dir')
-    parser.add_argument(
-        '--output', type=str, default='emore_arcface', help='output dir')
-    parser.add_argument('--resume', type=int, default=0, help='model resuming')
-    args = parser.parse_args()
-    main(args)
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataloader import CommonDataset
+
+from paddle.io import DataLoader
+from config import config as cfg
+from partial_fc import PartialFC
+from utils.utils_callbacks import CallBackVerification, CallBackLogging, CallBackModelCheckpoint
+from utils.utils_logging import AverageMeter
+import paddle.nn.functional as F
+from paddle.nn import ClipGradByNorm
+from visualdl import LogWriter
+import paddle
+import backbones
+import argparse
+import losses
+import time
+import os
+import sys
+
+
+def main(args):
+    world_size = int(1.0)
+    rank = int(0.0)
+
+    if not os.path.exists(args.output):
+        os.makedirs(args.output)
+    else:
+        time.sleep(2)
+
+    writer = LogWriter(logdir=args.logdir)
+    trainset = CommonDataset(root_dir=cfg.data_dir, label_file=cfg.file_list, is_bin=args.is_bin)
+    train_loader = DataLoader(
+        dataset=trainset,
+        batch_size=args.batch_size,
+        shuffle=True,
+        drop_last=True,
+        num_workers=0)
+
+    backbone = eval("backbones.{}".format(args.network))()
+    backbone.train()
+
+    clip_by_norm = ClipGradByNorm(5.0)
+    margin_softmax = eval("losses.{}".format(args.loss))()
+
+    module_partial_fc = PartialFC(
+        rank=0,
+        world_size=1,
+        resume=0,
+        batch_size=args.batch_size,
+        margin_softmax=margin_softmax,
+        num_classes=cfg.num_classes,
+        sample_rate=cfg.sample_rate,
+        embedding_size=args.embedding_size,
+        prefix=args.output)
+
+    scheduler_backbone_decay = paddle.optimizer.lr.LambdaDecay(
+        learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
+    scheduler_backbone = paddle.optimizer.lr.LinearWarmup(
+        learning_rate=scheduler_backbone_decay,
+        warmup_steps=cfg.warmup_epoch,
+        start_lr=0,
+        end_lr=args.lr / 512 * args.batch_size,
+        verbose=True)
+    opt_backbone = paddle.optimizer.Momentum(
+        parameters=backbone.parameters(),
+        learning_rate=scheduler_backbone,
+        momentum=0.9,
+        weight_decay=args.weight_decay,
+        grad_clip=clip_by_norm)
+
+    scheduler_pfc_decay = paddle.optimizer.lr.LambdaDecay(
+        learning_rate=args.lr, lr_lambda=cfg.lr_func, verbose=True)
+    scheduler_pfc = paddle.optimizer.lr.LinearWarmup(
+        learning_rate=scheduler_pfc_decay,
+        warmup_steps=cfg.warmup_epoch,
+        start_lr=0,
+        end_lr=args.lr / 512 * args.batch_size,
+        verbose=True)
+    opt_pfc = paddle.optimizer.Momentum(
+        parameters=module_partial_fc.parameters(),
+        learning_rate=scheduler_pfc,
+        momentum=0.9,
+        weight_decay=args.weight_decay,
+        grad_clip=clip_by_norm)
+
+    start_epoch = 0
+    total_step = int(
+        len(trainset) / args.batch_size / world_size * cfg.num_epoch)
+    if rank == 0:
+        print("Total Step is: %d" % total_step)
+
+    callback_verification = CallBackVerification(2000, rank, cfg.val_targets,
+                                                 cfg.data_dir)
+    callback_logging = CallBackLogging(10, rank, total_step, args.batch_size,
+                                       world_size, writer)
+    callback_checkpoint = CallBackModelCheckpoint(rank, args.output,
+                                                  args.network)
+
+    loss = AverageMeter()
+    global_step = 0
+    for epoch in range(start_epoch, cfg.num_epoch):
+        for step, (img, label) in enumerate(train_loader):
+            label = label.flatten()
+            global_step += 1
+            sys.stdout.flush()
+            features = F.normalize(backbone(img))
+            x_grad, loss_v = module_partial_fc.forward_backward(
+                label, features, opt_pfc)
+            sys.stdout.flush()
+            (features.multiply(x_grad)).backward()
+            sys.stdout.flush()
+            opt_backbone.step()
+            opt_pfc.step()
+            module_partial_fc.update()
+            opt_backbone.clear_gradients()
+            opt_pfc.clear_gradients()
+            sys.stdout.flush()
+
+            lr_backbone_value = opt_backbone._global_learning_rate().numpy()[0]
+            lr_pfc_value = opt_backbone._global_learning_rate().numpy()[0]
+
+            loss.update(loss_v, 1)
+            callback_logging(global_step, loss, epoch, lr_backbone_value,
+                             lr_pfc_value)
+            sys.stdout.flush()
+            callback_verification(global_step, backbone)
+        callback_checkpoint(global_step, backbone, module_partial_fc)
+        scheduler_backbone.step()
+        scheduler_pfc.step()
+    writer.close()
+
+
+if __name__ == '__main__':
+    def str2bool(v):
+        return v.lower() in ("true", "t", "1")
+    
+    parser = argparse.ArgumentParser(description='Paddle ArcFace Training')
+    parser.add_argument(
+        '--network',
+        type=str,
+        default='MobileFaceNet_128',
+        help='backbone network')
+    parser.add_argument(
+        '--loss', type=str, default='ArcFace', help='loss function')
+    parser.add_argument('--lr', type=float, default=0.1, help='learning rate')
+    parser.add_argument(
+        '--batch_size', type=int, default=512, help='batch size')
+    parser.add_argument(
+        '--weight_decay', type=float, default=2e-4, help='weight decay')
+    parser.add_argument(
+        '--embedding_size', type=int, default=128, help='embedding size')
+    parser.add_argument('--logdir', type=str, default='./log', help='log dir')
+    parser.add_argument(
+        '--output', type=str, default='emore_arcface', help='output dir')
+    parser.add_argument('--resume', type=int, default=0, help='model resuming')
+    parser.add_argument('--is_bin', type=str2bool, default=True, help='whether the train data is bin or original image file')
+    args = parser.parse_args()
+    main(args)